In [None]:
#DOWNLOAD EC50 data and prepare it for training
#EC50 dataset download link -> "https://github.com/karolpiczak/ESC-50#download"

In [26]:
import pandas as pd
import os
import numpy as np
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline

In [19]:
audio_path = "audio/"
metadat_path = "meta/esc50.csv"

In [20]:
mdf = pd.read_csv(metadat_path)
mdf

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A
...,...,...,...,...,...,...,...
1995,5-263831-B-6.wav,5,6,hen,False,263831,B
1996,5-263902-A-36.wav,5,36,vacuum_cleaner,False,263902,A
1997,5-51149-A-25.wav,5,25,footsteps,False,51149,A
1998,5-61635-A-8.wav,5,8,sheep,False,61635,A


In [21]:
mdf.category.unique()

array(['dog', 'chirping_birds', 'vacuum_cleaner', 'thunderstorm',
       'door_wood_knock', 'can_opening', 'crow', 'clapping', 'fireworks',
       'chainsaw', 'airplane', 'mouse_click', 'pouring_water', 'train',
       'sheep', 'water_drops', 'church_bells', 'clock_alarm',
       'keyboard_typing', 'wind', 'footsteps', 'frog', 'cow',
       'brushing_teeth', 'car_horn', 'crackling_fire', 'helicopter',
       'drinking_sipping', 'rain', 'insects', 'laughing', 'hen', 'engine',
       'breathing', 'crying_baby', 'hand_saw', 'coughing',
       'glass_breaking', 'snoring', 'toilet_flush', 'pig',
       'washing_machine', 'clock_tick', 'sneezing', 'rooster',
       'sea_waves', 'siren', 'cat', 'door_wood_creaks', 'crickets'],
      dtype=object)

In [39]:
cat_list = ['wind', 'breathing', 'coughing', 'snoring', 'sneezing']

In [40]:
mdf2 = mdf[mdf["category"].isin(cat_list)]

In [41]:
mdf2

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
37,1-137296-A-16.wav,1,16,wind,False,137296,A
76,1-18631-A-23.wav,1,23,breathing,False,18631,A
84,1-19111-A-24.wav,1,24,coughing,False,19111,A
85,1-19118-A-24.wav,1,24,coughing,False,19118,A
97,1-20545-A-28.wav,1,28,snoring,False,20545,A
...,...,...,...,...,...,...,...
1932,5-244459-A-28.wav,5,28,snoring,False,244459,A
1940,5-249748-A-28.wav,5,28,snoring,False,249748,A
1949,5-251489-A-24.wav,5,24,coughing,False,251489,A
1978,5-260164-A-23.wav,5,23,breathing,False,260164,A


In [50]:
def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name)

    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=16)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_fft=2048, hop_length=512, n_mels=10)
    mel_scaled_features = np.mean(mel_spectrogram.T,axis=0)

    zcr = librosa.feature.zero_crossing_rate(audio)
    zcr_scaled_features = np.mean(zcr.T,axis=0)

    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)
    spectral_centroid_scaled_features = np.mean(spectral_centroid.T,axis=0)

    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sample_rate)
    spectral_rolloff_scaled_features = np.mean(spectral_rolloff.T,axis=0)
    
    #stft = np.abs(librosa.stft(audio))
    #chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate)
    #chroma_scaled_features = np.mean(chroma.T,axis=0)


    return mfccs_scaled_features,mel_scaled_features,zcr_scaled_features,spectral_centroid_scaled_features,spectral_rolloff_scaled_features  #, chroma_scaled_features

In [51]:
import numpy as np
from tqdm import tqdm
extracted_features=[]
for index_num,row in tqdm(mdf2.iterrows()):
    file_name = os.path.join(audio_path,str(row["filename"]))
    final_class_labels=row["category"]
    mfcc,mels,zcr,sc,sr = features_extractor(file_name)
    extracted_features.append([mfcc[0],mfcc[1],mfcc[2],mfcc[3],mfcc[4],mfcc[5],\
                               mfcc[6],mfcc[7],mfcc[8],mfcc[9],mfcc[10],mfcc[11],\
                                mfcc[12],mfcc[13],mfcc[14],mfcc[15],\
                                    mels[0],mels[1],mels[2],mels[3],mels[4],mels[5],\
                                        mels[6],mels[7],mels[8],mels[9],\
                                            zcr[0],sc[0],sr[0],final_class_labels])

0it [00:00, ?it/s]

200it [00:19, 10.49it/s]


In [52]:
### converting extracted_features to Pandas dataframe
extracted_features_df = pd.DataFrame(extracted_features,columns=['mfcc1','mfcc2','mfcc3','mfcc4','mfcc5',\
                                                                 'mfcc6','mfcc7','mfcc8','mfcc9','mfcc10',\
                                                                    'mfcc11','mfcc12','mfcc13','mfcc14',\
                                                                        'mfcc15','mfcc16',\
                                                                            'mels1','mels2','mels3',\
                                                                                'mels4','mels5','mels6',\
                                                                                    'mels7','mels8','mels9','mels10',\
                                                                                        'zcr','sc','sr','status'])
extracted_features_df.head(10)

Unnamed: 0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,...,mels5,mels6,mels7,mels8,mels9,mels10,zcr,sc,sr,status
0,-220.101135,131.230728,28.395472,78.291901,14.544521,23.645449,18.423018,9.153439,-3.60185,6.003956,...,0.006738,0.008181,0.013148,0.005518,0.001258,0.000289,0.016816,753.409721,1367.956543,wind
1,-317.502777,146.177353,-17.692684,18.609312,-0.379267,13.967452,12.51088,-3.633912,-5.190124,3.865999,...,0.107843,0.015359,0.016034,0.00469,0.000764,3e-05,0.061679,1352.787275,2641.555786,breathing
2,-538.128662,15.930167,-10.820147,2.244197,-0.067081,-3.166114,0.969984,-0.095285,-4.449955,2.391409,...,0.199695,0.260446,0.265386,0.041402,0.001932,0.000939,0.029695,463.249212,763.930257,coughing
3,-823.460388,14.357989,-0.342857,3.046535,-1.855741,-3.136663,-0.314061,1.951042,-0.616935,2.040307,...,0.000168,0.000791,0.000534,4.9e-05,1.3e-05,4e-06,0.011072,245.492107,512.360636,coughing
4,-432.105408,65.02137,19.163795,37.822872,22.149366,19.557327,13.46264,17.397635,11.829626,13.782324,...,0.002882,0.002591,0.001846,0.001211,0.000355,0.000212,0.030859,1349.682907,2966.398112,snoring
5,-549.978638,11.583736,-9.78765,0.466155,0.751456,-1.073498,-5.849607,-5.397849,-7.536588,-3.891459,...,0.500476,0.308084,0.144584,0.046774,0.030742,0.022791,0.088304,1395.553395,2818.407186,sneezing
6,-288.365692,81.316643,20.732376,25.939322,1.720848,2.844645,-2.715636,7.258453,12.125429,10.001252,...,0.012294,0.026702,0.007856,0.009657,0.002362,0.000512,0.094751,2585.188737,6053.023275,snoring
7,-387.379883,70.376114,22.950096,21.253386,2.476728,6.491912,0.266393,1.968915,2.695635,3.104224,...,0.003917,0.002452,0.005237,0.005983,0.001398,0.000718,0.161953,3092.582428,6721.64917,snoring
8,-125.790123,54.253971,-6.652203,12.186908,-23.461964,5.394208,5.389681,-1.297247,-5.410065,26.414375,...,0.151313,0.065735,0.678435,0.576631,0.107582,0.132742,0.339955,3768.89886,7080.186971,wind
9,-564.828003,17.101517,3.977286,2.694859,4.964851,1.245317,2.856899,2.468068,0.055536,3.061555,...,0.062567,0.025159,0.062356,0.011848,0.01504,0.044847,0.152009,2333.436582,4628.641764,sneezing


In [53]:
extracted_features_df.to_parquet("ec50_data.parquet", index=False)