# MMI 702 PROJECT - PART1 : FEATURE EXTRACTION 

In [None]:
!pip install librosa
!pip install praat-parselmouth

In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import pickle
import glob
import parselmouth
import statistics
from parselmouth.praat import call
import math

%matplotlib inline

In [None]:
#Get Metadata of the Dataset, it contains labels of the auido files in the dataset
meta_data = pd.read_csv("metadata/UrbanSound8K.csv")  

np.shape(meta_data) # (8732, 8)
meta_data.head()

classes = list(meta_data['class'].unique()) 

In [None]:
#Feature extraction function for mfcc (13 mfcc feature), librosa library is used
def feature_extractor_mfcc(file_name, kwargs, scale=True,padding=False,max_pad=None,n=13):
    n = kwargs['n']
    '''
    Function to extract features from the audio file
    Does the following things using Librosa library:
        - Converts the sampling rate to 22.05 KHz
        - Normalize the Bit-depth values of the audio
        - Merge the audio channels into a Mono (single) channel
    Parameters: 
    Input:
        file_name : string; 
                    Path of the file to be processed 
        scale : False; 
                True when 1D features are required calculated 
                by taking mean along the axis
        padding : False; 
                  True when 2D features are required with padding
        max_pad : None [int]; 
                  Maxium size of a padded image/array. 
                  Required when padding is set to True
        n = 40 [int]; 
            Number of MFCCs to return          
    Output:
        mfccs = array of mfccs features.
                (1D when scaling = True
                 2D when padding = True)   
    '''
    try:        
        audio, sample_rate = librosa.load(file_name, 
                                          res_type='kaiser_fast') 
        
        mfccs = librosa.feature.mfcc(y = audio, 
                                     sr = sample_rate, 
                                     n_mfcc = n) 
            
        if padding:
            pad_width = max_pad - mfccs.shape[1]
            mfccs = np.pad(mfccs, 
                           pad_width=((0, 0), (0, pad_width)), 
                           mode='constant')
            
        if scale: 
            mfccs = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs

In [None]:
#Feature extraction function for hnr, parselmouth library is used
def feature_extractor_hnr(file_name, kwargs):

    try:
        sound = parselmouth.Sound(file_name) # read the sound
    except Exception as e:
        print("HNR: Error encountered while parsing file: ", file_name)
        return 0
        
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0) 
    if math.isnan(hnr):
        hnr = 100
    return hnr


In [None]:
#Feature extraction function for mean frequency, parselmouth library is used
def feature_extractor_meanf0(file_name, kwargs):

    try:
        sound = parselmouth.Sound(file_name) # read the sound
    except Exception as e:
        print("Meanf0: Error encountered while parsing file: ", file_name)
        return 0

    pitch = call(sound, "To Pitch", 0.0, 100, 10000) #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, "Hertz") # get mean pitch
    if math.isnan(meanF0):
        meanF0 = 0
    return meanF0

In [None]:
#Feature extraction function for standard deviation of the signal in terms of frequency, parselmouth library is used
def feature_extractor_stdevf0(file_name, kwargs):

    try:
        sound = parselmouth.Sound(file_name) # read the sound
    except Exception as e:
        print("stdf0: Error encountered while parsing file: ", file_name)
        return 0
        
    pitch = call(sound, "To Pitch", 0.0, 100, 10000) #create a praat pitch object
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, "Hertz") # get standard deviation
    if math.isnan(stdevF0):
        stdevF0 = 0
    return stdevF0

In [None]:
#Feature extraction function for zero crossing rate, librosa library is used
def feature_extractor_zcr(file_name, kwargs):

    audio, sample_rate = librosa.load(file_name, 
                                      res_type='kaiser_fast') 

    zcr = librosa.feature.zero_crossing_rate(y = audio) 
    zcr = np.mean(zcr.T,axis=0)
    return zcr

In [None]:
#Feature extraction function for spectral_centroid, librosa library is used
def feature_extractor_sc(file_name, kwargs):

    audio, sample_rate = librosa.load(file_name, 
                                      res_type='kaiser_fast') 
    
    sc = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)
    mean_sc = np.mean(sc)
    mean_sc = mean_sc/5000
    return mean_sc

In [None]:
#Feature extraction function for local jitter, parselmouth library is used
def feature_extractor_localJitter(file_name, kwargs):

    try:
        sound = parselmouth.Sound(file_name) # read the sound
    except Exception as e:
        print("localjitter: Error encountered while parsing file: ", file_name)
        return 0
        
    pointProcess = call(sound, "To PointProcess (periodic, cc)", 50, 10000)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    if math.isnan(localJitter):
        localJitter = 0
    return localJitter

In [None]:
#Feature extraction function for local shimmer, parselmouth library is used
def feature_extractor_localShimmer(file_name, kwargs):

    try:
        sound = parselmouth.Sound(file_name) # read the sound
    except Exception as e:
        print("localjitter: Error encountered while parsing file: ", file_name)
        return 0
        
    pointProcess = call(sound, "To PointProcess (periodic, cc)", 50, 10000)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    if math.isnan(localShimmer):
        localShimmer = 0
    return localShimmer

In [None]:
def extract_custom_feature(feature_extractor, **kwargs):
    feat_list_sc = []
    meta_data = pd.read_csv("metadata/UrbanSound8K.csv")  
    classes = list(meta_data['class'].unique()) 
    for index, row in meta_data.iterrows():
        file_name = os.path.join(os.path.abspath('audio/'),
                                 'fold'+str(row["fold"])+'/',
                                 str(row["slice_file_name"]))
        class_label = row["class"]
        data_sc = feature_extractor(file_name, kwargs)
        feat_list_sc.append([data_sc, class_label])

    features_1D = pd.DataFrame(feat_list_sc, columns=['feature','class_label'])
    return features_1D

### Functions to extract features from dataset and save it in the .pkl format (contains pandas.DataFrame)

In [None]:
def extract_mfcc_13():
    features_1D_mfcc_13 = extract_custom_feature(feature_extractor_mfcc, n=13)

    print('Processed ', len(features_1D_mfcc_13), ' files')
    print('Output Feature shape ',np.shape(features_1D_mfcc_13.iloc[0,0]))
    features_1D_mfcc_13.to_pickle("./features/mfcc_13_features.pk1")

In [None]:
def extract_mfcc_32():
    features_1D_mfcc_32 = extract_custom_feature(feature_extractor_mfcc, n=32)

    print('Processed ', len(features_1D_mfcc_32), ' files')
    print('Output Feature shape ',np.shape(features_1D_mfcc_32.iloc[0,0]))
    features_1D_mfcc_32.to_pickle("./features/mfcc_32_features.pk1")

In [None]:
def extract_hnr():
    features_1D_hnr = extract_custom_feature(feature_extractor_hnr)

    print('Processed ', len(features_1D_hnr), ' files')
    print('Output Feature shape ',np.shape(features_1D_hnr.iloc[0,0]))
    features_1D_hnr.to_pickle("./features/hnr_features.pk1")

In [None]:
def extract_localJitter():
    features_1D_localJitter = extract_custom_feature(feature_extractor_localJitter)

    print('Processed ', len(features_1D_localJitter), ' files')
    print('Output Feature shape ',np.shape(features_1D_localJitter.iloc[0,0]))
    features_1D_localJitter.to_pickle("./features/localJitter_features.pk1")

In [None]:
def extract_meanf0():
    features_1D_meanf0 = extract_custom_feature(feature_extractor_meanf0)

    print('Processed ', len(features_1D_meanf0), ' files')
    print('Output Feature shape ',np.shape(features_1D_meanf0.iloc[0,0]))
    features_1D_meanf0.to_pickle("./features/meanf0_features.pk1")

In [None]:
def extract_stdevf0():
    features_1D_stdevf0 = extract_custom_feature(feature_extractor_stdevf0)

    print('Processed ', len(features_1D_stdevf0), ' files')
    print('Output Feature shape ',np.shape(features_1D_stdevf0.iloc[0,0]))
    features_1D_stdevf0.to_pickle("./features/stdevf0_features.pk1")

In [None]:
def extract_zero_crossing_rate():
    features_1D_zcr = extract_custom_feature(feature_extractor_zcr)

    print('Processed ', len(features_1D_zcr), ' files')
    print('Output Feature shape ',np.shape(features_1D_zcr.iloc[0,0]))
    features_1D_zcr.to_pickle("./features/zcr_features.pk1")

In [None]:
def extract_spectral_centroid():
    features_1D_sc = extract_custom_feature(feature_extractor_sc)

    print('Processed ', len(features_1D_sc), ' files')
    print('Output Feature shape ',np.shape(features_1D_sc.iloc[0,0]))
    features_1D_sc.to_pickle("./features/sc_features.pk1")

In [None]:
#Call feature extractors
extract_mfcc_13()
print("13 MFCC extraction DONE")
extract_mfcc_32()
print("32 MFCC extraction DONE")
extract_hnr()
print("hnr extraction DONE")

In [None]:
#Call feature extractors
extract_zero_crossing_rate()
print("Zero crossing rate extraction DONE")
extract_spectral_centroid()
print("Spectral Centroid extraction DONE")

In [None]:
# extract_meanf0()
# print("meanf0 extraction DONE")
# extract_stdevf0()
# print("stdevf0 extraction DONE")
# extract_localJitter()
# print("stdevf0 extraction DONE")