In [1]:
#************************************************************************************
# Rezwan Matin
# EE5321 – Project
# Filename: SVM_TESS_3.ipynb
# Date: 01/28/20
#
# Objective:
# 13 MFCCs (mean) and 13 MFCCs (standard deviation), ZCR with background sound (city center noise).
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix

In [2]:
# Save directory path in 'path'
path = r'C:\Books\Texas State Books\Fall 2019\Thesis A\Corpus\Simulated\TESS\All_Background Noise'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,28])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '\\' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=13)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=13).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=13).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '\\' + file_list[i]))[0].split('_')[1]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=27, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([8.84597014e-316, 9.23902758e-322, 0.00000000e+000, ...,
       8.94889134e+000, 1.19628906e-001, 8.00000000e+000])

In [3]:
result_array.shape

(78428,)

In [4]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(2800, 28)

In [5]:
# Transfer Numpy array data to a Pandas dataframe
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({27: "Emotion"}, axis='columns')
# Rename class labels
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust", 8.0: "Surprised"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,Emotion
0,-294.816592,150.312527,-9.05212,-3.916243,-3.401448,3.24224,-19.317408,-3.925837,-1.380379,-2.866871,...,6.084997,7.6074,5.347751,5.710822,6.922808,6.725921,6.132534,5.202336,0.062166,Neutral
1,-288.091204,145.271982,-5.288622,-6.255021,-3.657859,2.262401,-19.198261,-3.398103,-3.573883,-2.16475,...,6.239199,6.568302,5.397851,5.633117,7.083575,6.471878,5.844693,5.911292,0.063644,Neutral
2,-291.699426,147.373412,-7.882165,-4.298593,-3.659971,4.539438,-21.546068,-4.097793,-3.392886,-3.160771,...,5.736303,7.272971,5.582402,4.799888,7.071246,6.493861,5.820393,5.481666,0.063808,Neutral
3,-291.544656,144.747928,-8.26436,-2.604379,-4.110115,2.04169,-19.160577,-5.905657,-2.103194,-4.258722,...,6.234078,7.45733,5.596513,5.378232,6.463868,6.34349,5.620995,5.884236,0.063059,Neutral
4,-295.289657,145.684204,-7.542696,-0.377279,-3.937892,2.975318,-20.389949,-4.501793,-2.01089,-3.079032,...,6.763071,8.07231,4.580776,5.553643,6.699931,7.046319,5.921853,4.595884,0.062494,Neutral
5,-294.410773,146.997617,-6.913076,-1.878128,-3.774099,4.446398,-21.280524,-4.048529,-2.401243,-3.084246,...,5.837386,6.730824,5.312318,4.610486,7.288002,5.183214,4.901872,5.239714,0.061428,Neutral
6,-292.026111,147.922252,-8.762098,-2.491947,-4.586831,4.456701,-20.851006,-4.445137,-3.113345,-3.14376,...,5.163166,9.120895,5.614463,5.43657,6.961529,6.14948,5.64684,4.912149,0.063738,Neutral
7,-295.350903,149.33611,-7.667542,-3.810652,-3.175914,3.547584,-20.809067,-4.425257,-3.122551,-3.040127,...,6.057697,6.919838,5.715732,5.362707,7.308826,6.4468,5.051135,5.986403,0.062233,Neutral
8,-296.475896,151.145535,-10.541584,-2.887677,-5.255899,3.866349,-21.18012,-4.844478,-3.060344,-3.504551,...,5.602725,6.663025,5.478602,5.09474,8.057717,5.836123,4.874563,5.772665,0.06094,Neutral
9,-288.698096,142.524684,-6.085739,-2.24896,-3.6594,4.119679,-20.601118,-4.011308,-3.561511,-4.197139,...,5.885934,8.394726,5.817749,5.830238,7.384967,7.630625,5.111994,5.728527,0.065986,Neutral


In [6]:
df['Emotion'].value_counts()

Disgust      400
Angry        400
Neutral      400
Fearful      400
Surprised    400
Happy        400
Sad          400
Name: Emotion, dtype: int64

In [7]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df.iloc[0:2800, 27].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df.iloc[0:2800, list(range(27))].values
print(y)

['Neutral' 'Neutral' 'Neutral' ... 'Surprised' 'Surprised' 'Surprised']


In [8]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [9]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=10.0, gamma=0.1, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=10.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [10]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [11]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 11
Training Accuracy: 1.00
Test Accuracy: 0.98
