In [2]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_CREMAD_3.py
# Date: 2/6/20
#
# Objective:
# 26 MFCCs (mean) and 26 MFCCs (standard deviation), ZCR with background sound (city center noise).
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix
import itertools

In [3]:
# Save directory path in 'path'
path = r'C:/Books/Texas State Books/Fall 2019/Thesis A/Corpus/Simulated/CREMA-D/All_Background_Noise'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,54])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '/' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '/' + file_list[i]))[0].split('-')[2]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=53, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([8.36915584e-316, 2.79430091e-317, 6.01346953e-154, ...,
       4.45799467e+000, 8.88141135e-002, 7.00000000e+000])

In [4]:
result_array.shape

(401922,)

In [5]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(7442, 54)

In [6]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({53: "Emotion"}, axis='columns')
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,Emotion
0,-257.743452,128.257127,-20.062134,-3.656925,-0.32153,-11.715507,-16.518107,1.373187,-7.757371,-7.262538,...,4.261163,5.043436,3.933841,5.676844,4.227004,3.638411,4.611141,5.622693,0.091658,Neutral
1,-247.063089,132.698874,-13.051016,5.226668,1.173245,-10.330729,-8.862077,4.281481,-3.368292,-1.523319,...,4.181115,3.931223,3.80802,5.294748,4.570782,4.362616,5.393032,4.264077,0.084323,Neutral
2,-250.648794,130.863725,-8.130352,4.462313,3.3679,-11.474364,-7.303592,3.008775,-4.327485,-1.77707,...,4.354691,3.948244,4.280088,3.466519,3.19766,4.12015,4.795012,3.643966,0.084563,Neutral
3,-238.623286,133.869109,-14.213947,3.578632,-1.015762,-7.026388,-10.85012,0.884596,-0.639657,-1.640199,...,4.478362,4.53755,3.943905,5.223925,5.166723,3.513438,6.170609,4.862289,0.083489,Neutral
4,-251.158784,133.508828,-11.83173,1.667155,6.055271,-9.644207,-9.877352,4.103262,-2.686458,-2.255455,...,4.112669,4.164796,3.958947,4.98427,3.850414,3.943836,5.046918,4.131732,0.087938,Neutral
5,-246.665686,133.066719,-14.072409,4.555111,3.544456,-8.250208,-10.373594,3.321216,-3.264335,-3.067006,...,4.718605,4.868713,4.914801,5.354797,4.040029,4.114351,4.637503,3.764958,0.086642,Neutral
6,-246.231917,132.022023,-10.061965,4.909182,-1.244429,-9.276056,-8.368454,1.401375,-1.538015,-1.609722,...,5.313956,3.373341,4.535294,5.002469,3.334614,4.138312,5.477847,3.676735,0.081913,Neutral
7,-242.111154,131.225982,-12.924863,3.586309,3.077017,-8.426216,-10.376227,3.426574,-0.81129,-2.458038,...,3.668968,3.948183,4.201171,5.052123,3.36391,3.084562,5.704372,4.206582,0.085449,Neutral
8,-241.387842,120.667214,-5.410135,0.703188,5.742421,-12.362735,-9.719696,1.699374,-2.704435,-3.113361,...,4.690317,4.116911,4.84944,6.30506,3.333938,4.316075,5.583106,3.846032,0.116965,Neutral
9,-237.655965,124.979911,-9.411852,0.776253,2.459028,-7.738523,-7.465813,-0.338907,-1.62858,-2.714056,...,4.561047,3.809824,4.562403,5.879053,3.756748,3.812555,5.507485,4.412995,0.095568,Neutral


In [7]:
df['Emotion'].value_counts()

Angry      1271
Happy      1271
Disgust    1271
Fearful    1271
Sad        1271
Neutral    1087
Name: Emotion, dtype: int64

In [8]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority = df[df.Emotion=="Neutral"]
df_majority3 = df[df.Emotion=="Happy"]
df_majority4 = df[df.Emotion=="Sad"]
df_majority5 = df[df.Emotion=="Angry"]
df_majority6 = df[df.Emotion=="Fearful"]
df_majority7 = df[df.Emotion=="Disgust"]
df_majority8 = df[df.Emotion=="Surprised"]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority, 
                                 replace=True,     # sample with replacement
                                 n_samples=1271,    # to match majority class
                                 random_state=123) # reproducible results
 
# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Angry      1271
Happy      1271
Disgust    1271
Neutral    1271
Fearful    1271
Sad        1271
Name: Emotion, dtype: int64

In [9]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:7626, 53].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:7626, list(range(53))].values
print(y)

['Neutral' 'Neutral' 'Neutral' ... 'Disgust' 'Disgust' 'Disgust']


In [10]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [17]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=5.01, gamma=0.035, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=5.01, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.035, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [18]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [19]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 823
Training Accuracy: 0.88
Test Accuracy: 0.57
