In [504]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_Project_Final_3.py
# Date: 12/11/19
#
# Objective:
# 13 MFCCs.
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler

In [505]:
# Save directory path in 'path'
path = r'C:\Books\Texas State Books\Fall 2019\Thesis A\Corpus\Simulated\RAVDESS\All'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,42])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '\\' + file_list[i], sr=16000)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=41).T,axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '\\' + file_list[i]))[0].split('-')[2]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(avg_mfcc_feat, obj=41, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([ 7.29290903e-304,  7.74859723e-304,  1.39610347e-309, ...,
        1.04569352e+000, -1.93416854e+000,  8.00000000e+000])

In [506]:
result_array.shape

(60522,)

In [507]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(1440, 42)

In [508]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({41: "Emotion"}, axis='columns')
# Delete unnecessary emotion data (calm)
df.drop(df[df['Emotion'] == 2.0].index, inplace = True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.tail(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,Emotion
1236,-573.848588,46.529829,-1.996194,4.366949,0.476567,-2.624179,-9.680937,-6.071887,-6.379992,-1.544885,...,1.311004,2.674462,-1.021965,1.554322,0.725517,3.171725,-0.227515,1.874954,0.668365,8.0
1237,-610.644369,27.354294,-1.804459,3.297733,-13.699217,-6.411533,-15.568777,-3.946175,-9.451201,-6.80814,...,1.576443,2.324319,-1.325325,0.665216,-0.352963,1.442189,-2.0073,-1.561974,-2.994707,8.0
1238,-542.001963,60.38029,-6.188802,23.976652,5.042993,-3.740892,-3.052043,-7.541337,-2.138802,-0.910388,...,-0.239348,2.698978,-0.729658,1.230961,-0.009385,2.523053,0.852207,1.769201,0.181132,8.0
1239,-520.946711,32.748223,-11.879473,0.491982,-14.526002,-17.345743,-14.641264,-8.165047,-8.65251,-10.086634,...,-1.769573,0.265005,-0.541398,1.995636,-0.806177,2.690545,1.089187,1.328396,-2.219862,8.0
1240,-455.584559,31.212537,4.27057,4.504648,-1.298668,3.087968,-1.774286,-1.638877,-4.018846,3.114071,...,1.759265,3.578296,0.89149,5.057628,1.152274,3.972974,1.040239,4.256041,1.070851,8.0
1241,-582.305337,27.951881,-3.292516,-0.987903,-9.778702,5.400651,-5.384935,-7.868084,-9.045888,-4.682148,...,-1.042141,2.340775,-1.174169,-0.356767,-2.566269,0.030031,-0.779082,0.602524,-2.038206,8.0
1242,-656.668135,58.283837,16.194336,14.154455,1.783136,10.170128,-8.850361,-0.65235,-2.893777,3.191281,...,1.522107,1.428916,-0.911478,1.119676,2.061883,2.733019,1.004023,2.117052,0.341444,8.0
1243,-589.544299,27.678968,-18.379676,5.268783,-13.21124,-5.410846,-22.95011,-7.214175,-5.296114,-7.427383,...,-1.695784,-1.330651,-3.806211,-0.500545,-2.277378,0.805128,-1.959628,-1.442201,-3.195362,8.0
1244,-505.248003,40.855623,-3.922663,12.295725,-4.162346,-7.618341,-8.299912,-16.350918,-4.49705,-4.329366,...,-1.781388,-0.60237,-3.128227,1.574501,0.570319,0.346203,-2.535722,0.419041,-1.357481,8.0
1245,-529.0141,20.123333,-17.066619,-4.259952,-6.233545,-13.162866,-14.427995,-13.282015,-4.853356,-3.857434,...,1.719363,1.52429,-3.41962,2.199301,-0.221804,2.997242,-0.226449,1.367185,-0.977477,8.0


In [509]:
df['Emotion'].value_counts()

8.0    192
7.0    192
6.0    192
5.0    192
4.0    192
3.0    192
1.0     96
Name: Emotion, dtype: int64

In [510]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority = df[df.Emotion==1.0]
df_majority3 = df[df.Emotion==3.0]
df_majority4 = df[df.Emotion==4.0]
df_majority5 = df[df.Emotion==5.0]
df_majority6 = df[df.Emotion==6.0]
df_majority7 = df[df.Emotion==7.0]
df_majority8 = df[df.Emotion==8.0]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority, 
                                 replace=True,     # sample with replacement
                                 n_samples=192,    # to match majority class
                                 random_state=123) # reproducible results
 
# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

8.0    192
7.0    192
6.0    192
5.0    192
4.0    192
3.0    192
1.0    192
Name: Emotion, dtype: int64

In [511]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:1344, 41].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:1344, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]].values
print(y)

[1. 1. 1. ... 8. 8. 8.]


In [512]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [513]:
# Best settings from GridSearchCV
svm = SVC(random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [514]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [515]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 138
Training Accuracy: 0.82
Test Accuracy: 0.59
