# Preprocessing and Spectrogram Generation 

In [1]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from skimage.transform import resize

# All the audio files have a sample rate of 48000
sr = 48000

# All the audio clips are under 8 seconds
length = 8*sr

# ResNet50 input layer is 224 x 224 x 3, so I'm resizing the image to fit the first input dimension. 
dimensions = (224,224)

data_path = '../Data/'

#Determines the number of images to generate
num_of_images = 20

noises = [.002,.005]

In [2]:
def generate_image(data,image_title,path):
    fig, ax = plt.subplots()
    img = librosa.display.specshow(data, x_axis='time', y_axis='mel', sr=sr, ax=ax)
    fig.colorbar(img, ax=ax, format='%+2.0f dB')
    ax.set(title=image_title)
    fig.savefig(path)
    plt.close(fig)

In [3]:
def generate_noisy(data,noisiness):
    completed = []
    for noisy in noisiness:
        data_noise = data.copy()
        noise_amp = noisy*np.random.uniform()*np.amax(data_noise)
        data_noise = data_noise.astype('float64') + noise_amp * np.random.normal(size=data_noise.shape)
        completed.append(data_noise)
    return completed

In [4]:
#Currently using a spectogram, melspectrogram, Chromagram SFTF
#Possible other ideas: Chirplet
def generate_spectrograms(df,file_name):
    
    col_names = ['mspec_db','chroma_db','stft_db']
    for col_name in col_names:
        df[col_name] = np.nan
        df[col_name] = df[col_name].astype(object)
        for noise in noises:
            df[col_name+'_n_'+str(noise)] = np.nan
            df[col_name+'_n_'+str(noise)] = df[col_name+'_n_'+str(noise)].astype(object)

    for idx,row in df.iterrows():
        wav, sr = librosa.load(data_path + 'train/' + row['recording_id'] + '.flac', sr=None)
    
        # Slicing and centering spectograms 
        middle = (int)((row['t_min'] + row['t_max'])*sr/2)
    
        left = (int)(middle-(length/2))
        right = (int)(middle+(length/2))
    
        #Assumes audio files are at least as long as length
        if left < 0:
            right += left
            left = 0
        elif right > len(wav):
            left -= right-len(wav)
            right = len(wav)
        
        #Melspectrogram
        mspec = librosa.feature.melspectrogram(y=wav[left:right], sr=sr, power=2.0, fmax=14000)
        mspec = librosa.decompose.nn_filter(mspec)
        mspec_vars = generate_noisy(mspec, noises)
        mspec_vars.append(mspec)
        
        for i, mspec_var in enumerate(mspec_vars):
            mspec_var = librosa.power_to_db(mspec_var, ref=np.max)
            if idx < num_of_images:
                title = 'Melspectrogram'
                path_folder = 'Melspectrograms'
                if i != len(mspec_vars)-1: #one of the noisy ones
                    title += ' with Noise ' + str(noises[i])
                    path_folder += '_Noise_' + str(noises[i])
                path = data_path+path_folder+'/'+file_name+'/'+str(idx)+'.png'
                generate_image(mspec_var,title,path)
            mspec_var = resize(mspec_var, dimensions)
            col_name = 'mspec_db'
            if i != len(mspec_vars)-1:
                col_name += ('_n_'+str(noises[i]))
            df.at[idx, col_name] = mspec_var
        
        #Chroma
        chroma = librosa.feature.chroma_stft(y=wav[left:right], sr=sr)
        chroma = librosa.decompose.nn_filter(chroma, aggregate=np.median, metric='cosine')
        chroma_vars = generate_noisy(chroma, noises)
        chroma_vars.append(chroma)
        
        for i, chroma_var in enumerate(chroma_vars):
            chroma_var = librosa.power_to_db(chroma_var, ref=np.max)
            if idx < num_of_images:
                title = 'Chromagram'
                path_folder = 'Chromagrams'
                if i != len(chroma_vars)-1: #one of the noisy ones
                    title += ' with Noise ' + str(noises[i])
                    path_folder += '_Noise_' + str(noises[i])
                path = data_path+path_folder+'/'+file_name+'/'+str(idx)+'.png'
                generate_image(chroma_var,title,path)
            chroma_var = resize(chroma_var, dimensions)
            col_name = 'chroma_db'
            if i != len(chroma_vars)-1:
                col_name += ('_n_'+str(noises[i]))
            df.at[idx, col_name] = chroma_var
        
        #Short-time Fourier Transform
        stft = np.abs(librosa.stft(y=wav[left:right]))
        stft = librosa.decompose.nn_filter(stft)
        stft_vars = generate_noisy(stft, noises)
        stft_vars.append(stft)
        
        for i, stft_var in enumerate(stft_vars):
            stft_var = librosa.amplitude_to_db(stft_var, ref=np.max)
            if idx < num_of_images:
                title = 'Spectrogram'
                path_folder = 'Spectrograms'
                if i != len(stft_vars)-1: #one of the noisy ones
                    title += ' with Noise ' + str(noises[i])
                    path_folder += '_Noise_' + str(noises[i])
                path = data_path+path_folder+'/'+file_name+'/'+str(idx)+'.png'
                generate_image(stft_var,title,path)
            stft_var = resize(stft_var, dimensions)
            col_name = 'stft_db'
            if i != len(chroma_vars)-1:
                col_name += ('_n_'+str(noises[i]))
            df.at[idx, col_name] = stft_var
        
    df.to_csv(data_path+'csv/'+file_name+'_data.csv')

In [None]:
train_tp_df = pd.read_csv(data_path + 'train_tp.csv')
generate_spectrograms(train_tp_df,'train_tp')

In [None]:
train_fp_df = pd.read_csv(data_path + 'train_fp.csv')
generate_spectrograms(train_fp_df,'train_fp')