In [1]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_CREMAD_1.py
# Date: 2/6/20
#
# Objective:
# 26 MFCCs (mean) and 26 MFCCs (standard deviation), ZCR
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix
import itertools

In [3]:
# Save directory path in 'path'
path = r'C:/Books/Texas State Books/Fall 2019/Thesis A/Corpus/Simulated/CREMA-D/AudioWAV'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,54])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '/' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '/' + file_list[i]))[0].split('-')[2]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=53, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([8.47914513e-316, 3.34474537e-317, 5.98142246e-154, ...,
       5.25193664e+000, 5.75588060e-002, 7.00000000e+000])

In [4]:
result_array.shape

(401922,)

In [5]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(7442, 54)

In [6]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({53: "Emotion"}, axis='columns')
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,Emotion
0,-409.580971,103.64549,27.161515,18.093021,12.811474,5.402862,-5.392274,-3.800348,-3.544491,-6.523918,...,4.909617,5.224191,3.860434,4.772426,8.709305,7.591669,7.18976,9.671666,0.073212,Neutral
1,-373.770672,109.596019,38.734953,33.685422,7.479949,-6.319713,-3.844802,-0.673088,1.868879,1.55858,...,4.683779,5.037258,4.927024,4.653484,5.907128,5.293187,5.448358,7.169724,0.061239,Neutral
2,-385.847196,103.957394,49.327729,34.439667,10.1989,-5.113278,-0.401824,-4.047927,-2.114799,1.580962,...,4.23771,5.035474,2.904025,5.119793,4.102875,3.889148,5.18262,4.664451,0.069689,Neutral
3,-349.317253,116.763537,30.604149,20.281985,1.139636,3.501241,-5.960475,-7.493484,6.001803,3.297082,...,6.127637,5.458201,5.348188,4.18667,4.745682,4.777939,5.503477,7.142215,0.057289,Neutral
4,-393.262517,108.570857,46.502331,26.103168,12.889328,4.999521,-2.269362,-5.210427,2.550735,2.220535,...,4.04822,4.164893,3.31075,3.624401,4.413972,5.661447,3.76082,4.56124,0.062318,Neutral
5,-386.417754,107.03647,45.484288,31.376613,7.632382,4.481237,-0.600802,-4.760566,-0.568491,-0.818742,...,4.953704,5.154451,5.262378,4.487846,5.44822,5.748837,5.559225,5.997929,0.062483,Neutral
6,-371.362978,105.915817,43.068056,29.801091,2.214677,1.612234,-0.017777,-7.735834,1.272579,1.660309,...,4.706145,4.568333,4.987484,6.348377,4.83902,7.380442,6.041855,7.385808,0.057388,Neutral
7,-365.33433,107.50001,33.281928,27.622375,11.566356,-0.347679,-3.919243,-1.591541,5.255337,0.317914,...,3.919802,4.519577,4.400312,3.954656,6.527457,4.011414,5.107423,4.285636,0.065416,Neutral
8,-376.614158,85.992639,52.140134,34.382854,17.647919,-2.932433,-3.954962,-4.946166,-1.230316,-0.686492,...,4.462806,4.620988,5.338256,4.986356,5.332276,4.909621,4.074335,4.856918,0.118523,Neutral
9,-355.005952,100.322709,40.837729,23.594552,4.512362,1.550186,0.399004,-8.121458,3.946627,2.130804,...,5.798846,4.442366,5.888093,3.772484,5.190704,5.149193,5.26547,5.420003,0.100734,Neutral


In [7]:
df['Emotion'].value_counts()

Happy      1271
Disgust    1271
Sad        1271
Fearful    1271
Angry      1271
Neutral    1087
Name: Emotion, dtype: int64

In [8]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority = df[df.Emotion=="Neutral"]
df_majority3 = df[df.Emotion=="Happy"]
df_majority4 = df[df.Emotion=="Sad"]
df_majority5 = df[df.Emotion=="Angry"]
df_majority6 = df[df.Emotion=="Fearful"]
df_majority7 = df[df.Emotion=="Disgust"]
df_majority8 = df[df.Emotion=="Surprised"]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority, 
                                 replace=True,     # sample with replacement
                                 n_samples=1271,    # to match majority class
                                 random_state=123) # reproducible results
 
# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Neutral    1271
Happy      1271
Disgust    1271
Sad        1271
Fearful    1271
Angry      1271
Name: Emotion, dtype: int64

In [15]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:7626, 53].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:7626, list(range(53))].values
print(y)

['Neutral' 'Neutral' 'Neutral' ... 'Disgust' 'Disgust' 'Disgust']


In [10]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [131]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=5.01, gamma=0.035, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=5.01, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.035, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [132]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [133]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 712
Training Accuracy: 0.97
Test Accuracy: 0.63


In [3]:
fs

16000

In [4]:
def change_label(argument):
    switcher = {
        "ANG":5.0,
        "DIS":7.0,
        "FEA":6.0,
        "HAP":3.0,
        "NEU":1.0,
        "SAD":4.0,
    }
    return switcher.get(argument, "Nothing")

In [7]:
label = "SAD"
label = change_label(label)
label

4.0