In [8]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_CREMAD_1.py
# Date: 2/6/20
#
# Objective:
# 26 MFCCs (mean) and 26 MFCCs (standard deviation), ZCR
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix
import itertools

In [9]:
# Save directory path in 'path'
path = r'C:/Books/Texas State Books/Fall 2019/Thesis A/Corpus/Simulated/CREMA-D/AudioWAV'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,54])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

# Define a Switch/Case type function to change labels to float values. Python does not have switch/case method! Also, if/else is slower since it goes thorugh each test case. 
def change_label(argument):
    switcher = {
        "ANG":5.0,
        "DIS":7.0,
        "FEA":6.0,
        "HAP":3.0,
        "NEU":1.0,
        "SAD":4.0,
    }
    return switcher.get(argument, "Nothing")

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '/' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '/' + file_list[i]))[0].split('_')[2]
    
    label = change_label(label)
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=53, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([8.37540478e-316, 2.85905908e-317, 6.01346953e-154, ...,
       5.21085333e+000, 9.93559632e-002, 4.00000000e+000])

In [10]:
result_array.shape

(401922,)

In [11]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(7442, 54)

In [12]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({53: "Emotion"}, axis='columns')
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,Emotion
0,-305.513536,92.898502,8.516959,23.96159,7.464701,-5.752541,-11.893587,-9.657942,-4.007898,-13.330255,...,4.414721,5.528326,5.195753,5.307916,5.345335,4.888166,4.340222,4.59082,0.101868,Angry
1,-345.907395,95.820238,10.499119,31.618329,15.872472,-6.850031,-6.63118,-4.977547,-5.325229,-10.31018,...,5.772431,4.62241,4.378201,6.172889,5.291739,3.952297,6.09156,4.302587,0.093061,Disgust
2,-321.359573,94.798783,8.361474,23.435232,11.838125,-7.089389,-8.56127,-4.930308,-4.971758,-13.737217,...,4.692587,4.655821,4.081135,4.976462,5.778749,5.058721,5.338252,3.976718,0.084286,Fearful
3,-302.581993,92.462972,4.271082,28.020993,10.94718,-11.818868,-9.983442,-7.08476,-7.572051,-17.069607,...,5.853565,4.973709,5.776979,4.276045,5.119423,4.194873,7.402447,6.198938,0.084878,Happy
4,-334.895981,100.424973,9.473641,30.222457,11.459778,-3.331541,-8.357068,-9.735279,-6.035137,-12.111937,...,3.683387,4.591428,4.819043,4.521762,3.254442,3.987645,4.452091,4.017309,0.082031,Neutral
5,-364.626144,97.106995,15.754379,34.18939,18.356225,-4.537027,-6.385001,-4.838215,-4.753981,-10.991579,...,5.763942,5.029095,5.916576,5.586083,4.964254,4.093002,3.512989,4.858378,0.087023,Sad
6,-269.561322,92.729528,4.063934,10.854961,-8.344595,0.571943,-7.588304,-11.462844,-4.971021,-15.303206,...,4.856809,6.446196,5.315902,4.498581,3.604377,4.162016,5.950122,4.473912,0.116883,Angry
7,-359.484488,109.216239,12.352592,22.740965,6.94511,1.847854,-4.851697,-5.719257,-3.910132,-12.466139,...,4.497827,3.846127,3.050987,3.984318,3.470277,3.031272,3.938588,2.863776,0.077945,Angry
8,-333.021689,107.173722,10.303787,18.254047,0.821337,2.836936,-5.964388,-13.530434,-3.903504,-10.984605,...,4.964893,4.824891,4.731701,4.889751,5.297374,3.969922,4.305148,5.105868,0.100239,Angry
9,-314.020653,94.318556,3.926645,22.092602,-0.432584,-1.079293,-8.305307,-8.842047,-1.492963,-11.373153,...,4.934561,5.457799,4.600452,4.090309,4.063168,4.584091,3.778662,4.581795,0.107577,Disgust


In [13]:
df['Emotion'].value_counts()

Angry      1271
Fearful    1271
Sad        1271
Disgust    1271
Happy      1271
Neutral    1087
Name: Emotion, dtype: int64

In [14]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority = df[df.Emotion=="Neutral"]
df_majority3 = df[df.Emotion=="Happy"]
df_majority4 = df[df.Emotion=="Sad"]
df_majority5 = df[df.Emotion=="Angry"]
df_majority6 = df[df.Emotion=="Fearful"]
df_majority7 = df[df.Emotion=="Disgust"]
df_majority8 = df[df.Emotion=="Surprised"]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority, 
                                 replace=True,     # sample with replacement
                                 n_samples=1271,    # to match majority class
                                 random_state=123) # reproducible results
 
# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Angry      1271
Fearful    1271
Neutral    1271
Sad        1271
Disgust    1271
Happy      1271
Name: Emotion, dtype: int64

In [15]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:7626, 53].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:7626, list(range(53))].values
print(y)

['Neutral' 'Neutral' 'Neutral' ... 'Disgust' 'Disgust' 'Disgust']


In [16]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [17]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=4.88, gamma=0.048, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=4.88, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.048, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [18]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [19]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 790
Training Accuracy: 0.99
Test Accuracy: 0.59


In [3]:
fs

16000

In [4]:
def change_label(argument):
    switcher = {
        "ANG":5.0,
        "DIS":7.0,
        "FEA":6.0,
        "HAP":3.0,
        "NEU":1.0,
        "SAD":4.0,
    }
    return switcher.get(argument, "Nothing")

In [7]:
label = "SAD"
label = change_label(label)
label

4.0