# Data preprocessing 

The goal to this script is to construct the data pipeline for the music generation AI.
- Load the musics;
- Split them into sequences of a cetain length
- Convert the splits into MEL-spetrograms


In [None]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as display
import tensorflow as tf

TESTED_SOUND = "data/full_sounds\Electro\Alex Skrindo - Jumbo [NCS Release].mp3"
FINAL_IMG_FOLDER = "data/imgs"
BASE_FOLDER = "data/full_sounds/electro"
SPETROGRAMS_LENGTH = 45 # Seconds

In [None]:
y, sr = librosa.load(TESTED_SOUND)
y = y/np.max(y)
f"Sampling Rate: {sr}"

In [None]:
t = np.arange(y.shape[0])/sr
t.shape, y.shape

In [None]:
plt.figure(figsize=(18, 10))
plt.plot(t/60, y)
plt.xlabel("Time in minutes")
plt.grid()
plt.show()

In [None]:
seqin, seqout = 0*sr, SPETROGRAMS_LENGTH*sr
sequence = y[seqin: seqout]

In [None]:
sequence_mel_spetrogram = librosa.feature.melspectrogram(y=sequence, sr=sr)
S_dB = librosa.power_to_db(sequence_mel_spetrogram, ref=np.max)
f"spetrogram shape: {S_dB.shape}"

In [None]:
plt.figure(figsize=(18, 5))
ax = plt.subplot(111)
pos = plt.imshow(S_dB)
plt.colorbar(pos, format='%+2.0f dB')
ax.set(title='Mel-frequency spectrogram')

In [None]:
import os
from PIL import Image

def make_dataset(
    base_folder:str=BASE_FOLDER, 
    save_to:str=FINAL_IMG_FOLDER, 
    sequence_length:float=SPETROGRAMS_LENGTH,
    overlap:float=0.5
    ):
    
    for root, _, files in os.walk(base_folder):        
        filtered_files = [f'{root}/{f}' for f in files if '.mp3' in f]      
        
          
        
        for f in filtered_files:
            basename = os.path.basename(f)
            fname = os.path.splitext(basename)[0]
            loaded_sound, sr = librosa.load(f)
            loaded_sound = loaded_sound/np.max(loaded_sound)
            
            for i, seqin in enumerate(range(0, int(loaded_sound.shape[0] - SPETROGRAMS_LENGTH*sr), int(SPETROGRAMS_LENGTH*sr*overlap))):
                
                print(f'\r{i} {fname}', end="")
                seqout = seqin + SPETROGRAMS_LENGTH*sr
                
                selected_sequence = loaded_sound[seqin:seqout]
                
                sequence_mel_spetrogram = librosa.feature.melspectrogram(y=selected_sequence, sr=sr)
                S_dB = librosa.power_to_db(sequence_mel_spetrogram, ref=np.max)
                tensor_S_dB = tf.convert_to_tensor(S_dB)
                
                encoded_S_dB = tf.io.serialize_tensor(tensor_S_dB)
                
                features = {
                    "max":np.max(S_dB),
                    "min":np.min(S_dB),
                    "filename":fname,
                    "n_chunk":i,
                    "raw_image":encoded_S_dB
                }
                break
            break
                
                
                
        
make_dataset()