In [2]:
import numpy as np
import pandas as pd
import os
import librosa

import warnings
warnings.filterwarnings('ignore')

In [3]:
# Read .wav file and extract MFCC futures
def extract_mfcc(path, n_mfcc = 50):
    audio, sample_rate = librosa.load(path)
    mfcc = np.mean(librosa.feature.mfcc(y=audio, sr = sample_rate, n_mfcc = n_mfcc).T, axis = 0)
    
    return mfcc

extract_mfcc(path = "set_b/Bunlabelledtest__101_1305030823364_A.wav")

array([-3.7675204e+02,  1.9509880e+02,  3.9894493e+01, -3.4483456e+01,
       -4.0502377e+00,  3.2519104e+01,  1.4400771e+01, -1.6910505e+01,
       -1.2213233e+01,  9.5317307e+00,  9.5450516e+00, -5.4196734e+00,
       -6.5301614e+00,  4.6682138e+00,  6.4674659e+00, -1.8951681e+00,
       -3.8423703e+00,  2.1433916e+00,  2.9269059e+00, -3.1805477e+00,
       -5.1464300e+00, -7.0016634e-01,  1.2723141e+00, -1.6146678e+00,
       -2.3577247e+00,  9.4719607e-01,  2.3051488e+00, -7.8174151e-02,
       -1.2720234e+00,  2.0813408e-01,  4.0839198e-01, -1.3676862e+00,
       -1.4184954e+00,  3.8655740e-01,  2.1355706e-01, -1.8718290e+00,
       -1.8326700e+00,  3.3732346e-01,  4.7578675e-01, -1.5799507e+00,
       -1.5279752e+00,  1.0546710e+00,  1.6273245e+00, -9.0105891e-01,
       -2.1343641e+00, -2.6191193e-01,  9.2603385e-01, -6.6231692e-01,
       -1.7373650e+00, -4.0971819e-02], dtype=float32)

In [4]:
# Read both the csv files with labels for each file
dfb = pd.read_csv("set_b.csv")
dfb.head()

Unnamed: 0,dataset,fname,label,sublabel
0,b,set_b/extrastole__127_1306764300147_C2.wav,extrastole,
1,b,set_b/extrastole__128_1306344005749_A.wav,extrastole,
2,b,set_b/extrastole__130_1306347376079_D.wav,extrastole,
3,b,set_b/extrastole__134_1306428161797_C1.wav,extrastole,
4,b,set_b/extrastole__138_1306762146980_B.wav,extrastole,


In [5]:
dfa = pd.read_csv("set_a.csv")
dfa.head()

Unnamed: 0,dataset,fname,label,sublabel
0,a,set_a/artifact__201012172012.wav,artifact,
1,a,set_a/artifact__201105040918.wav,artifact,
2,a,set_a/artifact__201105041959.wav,artifact,
3,a,set_a/artifact__201105051017.wav,artifact,
4,a,set_a/artifact__201105060108.wav,artifact,


In [6]:
df = pd.concat([dfa, dfb])
df.info(), dfa.info(), dfb.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 832 entries, 0 to 655
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   dataset   832 non-null    object
 1   fname     832 non-null    object
 2   label     585 non-null    object
 3   sublabel  149 non-null    object
dtypes: object(4)
memory usage: 32.5+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   dataset   176 non-null    object 
 1   fname     176 non-null    object 
 2   label     124 non-null    object 
 3   sublabel  0 non-null      float64
dtypes: float64(1), object(3)
memory usage: 5.6+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 656 entries, 0 to 655
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   dataset   656 non-null    object
 1   fname     656 

(None, None, None)

In [7]:
# We are not focusing on the sub labels as for now
df.drop(["dataset", "sublabel"], axis = 1, inplace = True)
df.head()

Unnamed: 0,fname,label
0,set_a/artifact__201012172012.wav,artifact
1,set_a/artifact__201105040918.wav,artifact
2,set_a/artifact__201105041959.wav,artifact
3,set_a/artifact__201105051017.wav,artifact
4,set_a/artifact__201105060108.wav,artifact


In [8]:
df.dropna(inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 585 entries, 0 to 655
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   fname   585 non-null    object
 1   label   585 non-null    object
dtypes: object(2)
memory usage: 13.7+ KB


In [9]:
# Extracting the mfccs and rearranging them into a DF
mfcc_df = pd.DataFrame(columns=["mfcc"+str(i) for i in range(50)])
for i in range(len(df)):
    mfcc = extract_mfcc(path=df.iloc[i]["fname"], n_mfcc = 50)
    
    data = {"mfcc"+str(i):mfcc[i] for i in range(50)}
    data["fname"] = df.iloc[i]["fname"]
    data["label"] = df.iloc[i]["label"]
    mfcc_df = mfcc_df.append(data, ignore_index=True)
    
mfcc_df

Unnamed: 0,mfcc0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,...,mfcc42,mfcc43,mfcc44,mfcc45,mfcc46,mfcc47,mfcc48,mfcc49,fname,label
0,-594.029053,38.685047,5.528638,5.631155,3.402866,1.321781,-0.979730,-2.110061,-3.675011,-1.318223,...,0.574885,0.314081,0.139944,0.104879,0.105029,0.277018,-0.065550,0.118038,set_a/artifact__201012172012.wav,artifact
1,-750.833496,29.685860,-4.507759,-2.470311,-4.913468,-1.428128,-1.864767,0.225090,-4.211215,0.175512,...,-0.793682,1.592819,-0.751207,1.241795,-1.067852,1.084590,-0.780088,1.135242,set_a/artifact__201105040918.wav,artifact
2,-559.362183,35.351219,-13.119020,-3.854263,-10.170296,-2.961954,-8.500693,0.070704,-6.701177,-0.199900,...,-1.632973,2.054471,-1.348332,1.888599,-1.276106,1.303232,-0.784756,1.153624,set_a/artifact__201105041959.wav,artifact
3,-293.343018,103.457886,-13.003966,19.647190,-14.164202,-3.886931,-15.286445,-4.694386,-8.967902,-7.757905,...,0.853834,3.688840,0.803865,2.527869,-0.573466,1.877491,-1.014469,1.853802,set_a/artifact__201105051017.wav,artifact
4,-291.524170,95.693237,-11.997632,8.995483,-4.294098,2.263997,-8.406081,-6.354175,-8.621305,-3.386567,...,-0.222854,2.129115,-0.277558,1.836322,0.245034,2.129474,-0.898521,0.876153,set_a/artifact__201105060108.wav,artifact
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,-400.129578,195.269211,63.390137,-9.672095,2.324871,27.712965,16.137299,-6.226850,-2.395197,15.018492,...,1.853871,-0.649307,-1.711522,0.178742,1.344141,-0.391416,-1.768523,-0.046267,set_b/normal_noisynormal_284_1311168471850_A.wav,normal
581,-376.719177,204.537247,58.920521,-20.686821,-4.959510,26.717134,17.108431,-6.837900,-4.267258,13.267072,...,2.177969,-0.005022,-1.121046,0.357927,1.321591,-0.184563,-1.366481,0.139941,set_b/normal_noisynormal_284_1311168471850_B.wav,normal
582,-456.591949,173.774475,37.228088,-24.668669,5.883070,38.923752,23.321922,-1.004483,6.525411,23.303383,...,2.479716,-0.259972,-1.971896,-1.140991,-0.348904,-0.917764,-0.754072,0.940308,set_b/normal_noisynormal_285_1311169246969_C.wav,normal
583,-341.854401,194.594055,55.016411,-19.128372,-0.958779,30.275915,19.377094,-5.335845,-3.475631,12.877550,...,0.607510,-0.469917,-1.496821,-0.454646,0.654038,-0.442257,-1.888979,-0.967844,set_b/normal_noisynormal_296_1311682952647_C.wav,normal


In [10]:
mfcc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 585 entries, 0 to 584
Data columns (total 52 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   mfcc0   585 non-null    float32
 1   mfcc1   585 non-null    float32
 2   mfcc2   585 non-null    float32
 3   mfcc3   585 non-null    float32
 4   mfcc4   585 non-null    float32
 5   mfcc5   585 non-null    float32
 6   mfcc6   585 non-null    float32
 7   mfcc7   585 non-null    float32
 8   mfcc8   585 non-null    float32
 9   mfcc9   585 non-null    float32
 10  mfcc10  585 non-null    float32
 11  mfcc11  585 non-null    float32
 12  mfcc12  585 non-null    float32
 13  mfcc13  585 non-null    float32
 14  mfcc14  585 non-null    float32
 15  mfcc15  585 non-null    float32
 16  mfcc16  585 non-null    float32
 17  mfcc17  585 non-null    float32
 18  mfcc18  585 non-null    float32
 19  mfcc19  585 non-null    float32
 20  mfcc20  585 non-null    float32
 21  mfcc21  585 non-null    float32
 22  mf

In [11]:
mfcc_df.head()

Unnamed: 0,mfcc0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,...,mfcc42,mfcc43,mfcc44,mfcc45,mfcc46,mfcc47,mfcc48,mfcc49,fname,label
0,-594.029053,38.685047,5.528638,5.631155,3.402866,1.321781,-0.97973,-2.110061,-3.675011,-1.318223,...,0.574885,0.314081,0.139944,0.104879,0.105029,0.277018,-0.06555,0.118038,set_a/artifact__201012172012.wav,artifact
1,-750.833496,29.68586,-4.507759,-2.470311,-4.913468,-1.428128,-1.864767,0.22509,-4.211215,0.175512,...,-0.793682,1.592819,-0.751207,1.241795,-1.067852,1.08459,-0.780088,1.135242,set_a/artifact__201105040918.wav,artifact
2,-559.362183,35.351219,-13.11902,-3.854263,-10.170296,-2.961954,-8.500693,0.070704,-6.701177,-0.1999,...,-1.632973,2.054471,-1.348332,1.888599,-1.276106,1.303232,-0.784756,1.153624,set_a/artifact__201105041959.wav,artifact
3,-293.343018,103.457886,-13.003966,19.64719,-14.164202,-3.886931,-15.286445,-4.694386,-8.967902,-7.757905,...,0.853834,3.68884,0.803865,2.527869,-0.573466,1.877491,-1.014469,1.853802,set_a/artifact__201105051017.wav,artifact
4,-291.52417,95.693237,-11.997632,8.995483,-4.294098,2.263997,-8.406081,-6.354175,-8.621305,-3.386567,...,-0.222854,2.129115,-0.277558,1.836322,0.245034,2.129474,-0.898521,0.876153,set_a/artifact__201105060108.wav,artifact


In [12]:
# Saving the mfcc DF as a csv file to be used for traning and testing our ml moodels
mfcc_df.to_csv("mfcc.csv")