In [1]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_CREMAD_3.py
# Date: 2/6/20
#
# Objective:
# 26 MFCCs (mean) and 26 MFCCs (standard deviation), ZCR with background sound (city center noise).
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix
import itertools

In [3]:
# Save directory path in 'path'
path = r'C:/Books/Texas State Books/Fall 2019/Thesis A/Corpus/Simulated/CREMA-D/All_Background_Noise'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,54])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '/' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '/' + file_list[i]))[0].split('_')[2]
    
    if label=='ANG':
        label=5.0
    elif label=='DIS':
        label=7.0
    elif label=='FEA':
        label=6.0
    elif label=='HAP':
        label=3.0
    elif label=='NEU':
        label=1.0
    elif label=='SAD':
        label=4.0
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=53, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([-3.47205556e+02,  8.76884703e+01,  2.21978697e+01, ...,
        4.64314891e+00,  9.98936907e-02,  4.00000000e+00])

In [4]:
result_array.shape

(401922,)

In [5]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(7442, 54)

In [6]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({53: "Emotion"}, axis='columns')
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,Emotion
0,-200.305125,110.220892,-23.020927,5.564673,-3.43372,-13.933696,-17.513563,-7.055109,-5.801639,-11.929559,...,4.390137,5.704596,4.472694,5.963211,4.373028,4.973743,4.947161,4.61565,0.112644,Angry
1,-229.682659,117.691237,-23.806404,6.538366,-0.158326,-15.915603,-12.788466,-2.142581,-7.555061,-9.119011,...,4.532966,4.494573,4.390627,7.761214,3.80197,4.200039,5.875713,4.653402,0.109776,Disgust
2,-205.162285,115.02734,-25.505022,0.720303,-2.20534,-16.737664,-15.969883,-3.050651,-6.651684,-10.313271,...,4.664122,4.589419,4.357374,6.871053,4.673671,4.706475,5.423555,3.96227,0.107767,Fearful
3,-203.792435,110.446452,-20.932468,6.322168,-3.331733,-18.865721,-16.768705,-5.257298,-7.980065,-12.81137,...,4.814175,4.813741,6.061014,4.15785,3.770427,3.49618,5.326838,5.222182,0.106611,Happy
4,-231.29092,120.028535,-19.339869,6.865862,-2.101007,-13.31197,-15.556803,-3.412847,-5.938769,-9.790837,...,4.815232,3.888295,5.234906,4.617558,3.052873,4.135261,4.831568,3.761967,0.095619,Neutral
5,-240.366328,119.708737,-19.615643,6.490847,0.859614,-15.948502,-14.352046,0.215887,-5.750592,-7.641182,...,4.752702,4.040109,5.275019,4.541378,4.11712,4.226829,5.005871,3.890417,0.099315,Sad
6,-189.998975,109.625164,-18.440135,-2.575906,-15.193275,-7.732998,-14.82195,-8.828946,-5.55377,-14.327032,...,4.562264,5.71909,4.55381,4.954879,3.413063,3.878454,4.438109,3.034028,0.119805,Angry
7,-237.249116,126.187339,-19.853929,-0.159254,-8.325117,-8.37979,-13.365806,-1.79935,-6.65051,-8.018625,...,5.596206,4.561648,4.295121,4.147351,3.711096,3.408989,4.525216,3.749907,0.095853,Angry
8,-226.507189,121.143703,-19.38685,-0.146278,-7.22601,-9.152376,-15.827101,-4.806415,-6.146024,-10.578018,...,4.860474,5.557221,4.132427,6.431753,4.698516,4.268464,4.830722,4.297995,0.111287,Angry
9,-213.835203,115.532726,-21.220967,-0.962731,-12.764152,-9.622759,-13.804877,-6.222422,-5.024013,-8.448908,...,5.141141,5.650036,4.530034,3.682277,3.884114,3.675142,4.928433,4.202703,0.112413,Disgust


In [7]:
df['Emotion'].value_counts()

Sad        1271
Fearful    1271
Angry      1271
Happy      1271
Disgust    1271
Neutral    1087
Name: Emotion, dtype: int64

In [8]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority = df[df.Emotion=="Neutral"]
df_majority3 = df[df.Emotion=="Happy"]
df_majority4 = df[df.Emotion=="Sad"]
df_majority5 = df[df.Emotion=="Angry"]
df_majority6 = df[df.Emotion=="Fearful"]
df_majority7 = df[df.Emotion=="Disgust"]
df_majority8 = df[df.Emotion=="Surprised"]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority, 
                                 replace=True,     # sample with replacement
                                 n_samples=1271,    # to match majority class
                                 random_state=123) # reproducible results
 
# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Sad        1271
Neutral    1271
Fearful    1271
Angry      1271
Happy      1271
Disgust    1271
Name: Emotion, dtype: int64

In [9]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:7626, 53].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:7626, list(range(53))].values
print(y)

['Neutral' 'Neutral' 'Neutral' ... 'Disgust' 'Disgust' 'Disgust']


In [10]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [11]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=4.88, gamma=0.048, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=4.88, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.048, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [12]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [13]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 827
Training Accuracy: 0.91
Test Accuracy: 0.57
