# Preprocessing and Spectrogram Generation 

In [None]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from skimage.transform import resize

# All the audio files have a sample rate of 48000
sr = 48000
# All the audio clips are under 10 seconds
length = 10*sr

# ResNet50 input layer is 224 x 224 x 3, so I'm resizing the image to fit the first input dimension. 
mel_spec_dimensions = (224,224)

data_path = '../Data/'
df = pd.read_csv(data_path + 'train_tp.csv')

In [None]:
df['spec'] = np.nan
df['spec'] = df['spec'].astype(object)

for idx,row in df.iterrows():
    wav, sr = librosa.load(data_path + 'train/' + row['recording_id'] + '.flac', sr=None)
    
    # Slicing and centering spectograms 
    m = (int)((row['t_min'] + row['t_max'])*sr/2)
    
    l = (int)(m-(length/2))
    r = (int)(m+(length/2))
    
    #Assumes audio files are at least as long as length
    if l < 0:
        r += l
        l = 0
    elif r > len(wav):
        l -= r-len(wav)
        r = len(wav)
    mspec = librosa.feature.melspectrogram(y=wav[l:r], sr=sr, power=2.0)
    mspec = resize(mspec, mel_spec_dimensions)
    mspec = (mspec - np.min(mspec))/np.max(mspec)
    
    fig, ax = plt.subplots()
    mspec_db = librosa.power_to_db(mspec, ref=np.max)
    img = librosa.display.specshow(mspec_db, x_axis='time', y_axis='mel', sr=sr, ax=ax)
    fig.colorbar(img, ax=ax, format='%+2.0f dB')
    ax.set(title='Mel-frequency spectrogram')
    plt.show() #replace with save
    
    #Should we save power or db? Probably db?
    df.at[idx, 'spec'] = mspec
df.to_csv(data_path+'train_spectograms.csv')
df.head()