In [1]:
import librosa 
from scipy.io import wavfile as wav
import numpy as np
from tqdm import tqdm
import pandas as pd
import os

In [2]:
def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccs

In [3]:
# Set the path to the full UrbanSound dataset 
fulldatasetpath = '../Dataset/UrbanSound8K/audio/'

metadata = pd.read_csv('../UrbanSound Dataset sample/metadata/UrbanSound8K.csv')

features = []

In [None]:
# Iterate through each sound file and extract the features 
for index, row in tqdm(metadata.iterrows()):
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    
    class_label = row["class_name"]
    fold = row["fold"]
    data = extract_features(file_name)
    
    features.append([data, fold, class_label])

In [8]:
# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','fold','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files') 

Finished feature extraction from  8732  files


In [9]:
featuresdf

Unnamed: 0,feature,fold,class_label
0,"[[-306.77255, -177.59209, -99.13616, -65.97198...",5,dog_bark
1,"[[-457.69534, -451.0248, -450.68613, -444.9999...",5,children_playing
2,"[[-468.0367, -467.42264, -481.04654, -486.5948...",5,children_playing
3,"[[-422.42215, -411.9085, -409.46243, -409.0892...",5,children_playing
4,"[[-438.10162, -434.47787, -443.3284, -442.6643...",5,children_playing
...,...,...,...
8727,"[[-397.82446, -400.45578, -407.5035, -408.9529...",7,car_horn
8728,"[[-451.81265, -451.41983, -450.67892, -445.635...",7,car_horn
8729,"[[-301.06348, -298.25397, -305.0326, -303.8614...",7,car_horn
8730,"[[-373.6307, -369.44986, -366.48, -364.9094, -...",7,car_horn


In [10]:
featuresdf.to_pickle("./features.pkl")