In [1]:
import os
import pandas as pd
import librosa
from tqdm import tqdm
import numpy as np

In [2]:
data = pd.read_csv("metadata.csv")

In [3]:
data.head()

Unnamed: 0,absPath,fileName,label
0,./data/0/,0_george_0.wav,0
1,./data/0/,0_george_1.wav,0
2,./data/0/,0_george_10.wav,0
3,./data/0/,0_george_11.wav,0
4,./data/0/,0_george_12.wav,0


# MFCC

Here we will be using Mel-Frequency Cepstral Coefficients(MFCC) from the audio samples. The MFCC summarises the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

In [4]:
# feature extracting
def extract(location,filename):
    directory=location+filename
    audio, samplerate = librosa.load(directory, res_type='kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=samplerate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    return mfccs_scaled_features

In [5]:
extracted_features=[]
for index_num,row in tqdm(data.iterrows()):
    classLabel=row["label"]
    filename=row["fileName"]
    location=row["absPath"]
    data=extract(location,filename)
    extracted_features.append([data,classLabel])

3000it [00:37, 80.97it/s]


In [6]:
### converting extracted_features to Pandas dataframe
data=pd.DataFrame(extracted_features,columns=['feature','class'])
data.head()

Unnamed: 0,feature,class
0,"[-322.69284, 170.57394, -81.615074, 39.977394,...",0
1,"[-384.43948, 171.77277, -53.616756, 13.613436,...",0
2,"[-428.92017, 132.75345, -50.3403, 9.906248, 31...",0
3,"[-395.29694, 172.42221, -75.08815, 15.661075, ...",0
4,"[-394.4992, 175.45775, -73.03517, 13.853026, 4...",0


In [7]:
data.to_csv("processed.csv",index=False)