In [1]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_RAVDESS_CREMAD_2.py
# Date: 02/29/20
#
# Objective:
# Testing RAVDESS+CREMA-D data with background citycenter noise.
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix

In [2]:
# Save directory path in 'path'
path = r'C:/Books/Texas State Books/Fall 2019/Thesis A/Corpus/Simulated/RAVDESS+CREMA-D/All_Background_Noise'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,54])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '/' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '/' + file_list[i]))[0].split('-')[2]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=53, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([8.49940972e-316, 2.04958193e-317, 6.01346953e-154, ...,
       4.45799467e+000, 8.88141135e-002, 7.00000000e+000])

In [3]:
result_array.shape

(479682,)

In [4]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(8882, 54)

In [5]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({53: "Emotion"}, axis='columns')
# Delete unnecessary emotion data (calm)
df.drop(df[df['Emotion'] == 2.0].index, inplace = True)
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust", 8.0: "Surprised"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,Emotion
0,-370.811277,200.945609,-25.601894,12.781816,-23.361012,12.48095,-7.339299,-13.182599,-9.380556,-4.284814,...,5.38472,4.222119,4.56676,4.612926,4.793089,4.647,5.119929,4.119411,0.034162,Neutral
1,-366.069746,199.358091,-26.372811,12.00766,-23.430119,12.148651,-8.856314,-11.990725,-10.876269,-3.940869,...,5.253634,4.672116,4.409851,5.220854,4.638651,4.641891,4.555992,4.670715,0.03507,Neutral
2,-371.803196,200.366548,-24.397291,14.295511,-24.206423,12.721038,-6.820065,-13.396596,-8.79066,-4.445858,...,5.15294,3.97812,4.557066,4.751056,4.582706,4.638075,4.598379,3.972958,0.033693,Neutral
3,-371.427536,201.537947,-24.865862,12.422414,-23.916673,12.643198,-7.354886,-13.721731,-9.521451,-4.66822,...,5.043958,4.461439,4.518882,4.675751,4.518056,4.727016,4.72746,4.1555,0.03373,Neutral
4,-373.642376,202.827342,-25.553881,12.782621,-23.733961,11.745437,-6.510785,-14.144631,-8.510501,-5.069466,...,4.926882,4.109352,4.650759,4.858926,4.639135,4.482118,4.668996,3.875404,0.033649,Neutral
5,-371.129772,198.760479,-24.414977,12.606131,-24.439266,12.853604,-8.206087,-12.676846,-10.240667,-3.625793,...,5.272938,3.965703,4.33003,4.571316,4.455153,4.716155,4.868451,4.398352,0.034611,Neutral
6,-367.217488,201.85731,-26.308989,12.84815,-23.175489,13.035778,-8.457988,-13.026163,-8.166834,-4.056443,...,5.277703,4.265305,4.275447,5.143325,4.634569,4.704514,5.123454,4.05518,0.033943,Neutral
7,-368.952229,199.899911,-24.563479,12.439998,-24.512029,12.515186,-8.014015,-14.416847,-9.995882,-3.750753,...,5.026957,4.19475,4.391366,4.718514,4.514515,4.600495,4.672192,4.377275,0.036392,Neutral
8,-367.990198,201.492306,-25.436294,13.502739,-24.122912,13.747697,-8.576226,-13.346668,-8.828258,-4.495377,...,5.229996,4.696323,4.361298,4.898236,4.389856,4.877307,4.76925,4.202704,0.033971,Neutral
9,-373.110028,201.057245,-24.4143,12.865499,-24.695461,13.433978,-7.34326,-13.721217,-9.161911,-4.392434,...,4.605601,4.32402,4.458427,4.702443,4.457803,4.898994,4.860985,4.462224,0.033467,Neutral


In [6]:
df['Emotion'].value_counts()

Happy        1463
Fearful      1463
Disgust      1463
Angry        1463
Sad          1463
Neutral      1183
Surprised     192
Name: Emotion, dtype: int64

In [7]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority_1 = df[df.Emotion=="Neutral"]
df_majority3 = df[df.Emotion=="Happy"]
df_majority4 = df[df.Emotion=="Sad"]
df_majority5 = df[df.Emotion=="Angry"]
df_majority6 = df[df.Emotion=="Fearful"]
df_majority7 = df[df.Emotion=="Disgust"]
df_majority8 = df[df.Emotion=="Surprised"]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority_1, 
                                 replace=True,     # sample with replacement
                                 n_samples=1463,    # to match majority class
                                 random_state=123) # reproducible results


# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Happy        1463
Neutral      1463
Fearful      1463
Disgust      1463
Angry        1463
Sad          1463
Surprised     192
Name: Emotion, dtype: int64

In [8]:
df_minority_2 = df[df.Emotion=="Surprised"]

# Upsample minority class
df_minority_upsampled_2 = resample(df_minority_2, 
                                 replace=True,     # sample with replacement
                                 n_samples=1271,    # to match majority class
                                 random_state=123) # reproducible results

# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled_2, df_upsampled])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Happy        1463
Neutral      1463
Surprised    1463
Fearful      1463
Disgust      1463
Angry        1463
Sad          1463
Name: Emotion, dtype: int64

In [9]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:4144, 53].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:4144, list(range(53))].values
print(y)

['Surprised' 'Surprised' 'Surprised' ... 'Happy' 'Happy' 'Happy']


In [10]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [38]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=7.0, gamma=0.06, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=7.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.06, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [39]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [40]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 108
Training Accuracy: 0.99
Test Accuracy: 0.90
