In [28]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_RAVDESS_TESS_CREMAD.py
# Date: 7/30/20
#
# Objective:
# 26 MFCCs (mean) and 26 MFCCs (standard deviation), 7 spectral contrast, 2 poly features, and 1 RMS.
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [29]:
# Save directory path in 'path'
path = r'C:/Books/Texas State Books/Fall 2019/Thesis A/Corpus/Simulated/RAVDESS+TESS+CREMA-D/All'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,63])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '/' + file_list[i], sr=16000)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26, n_fft=512, hop_length=256, htk=True).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26, n_fft=512, hop_length=256, htk=True).T,axis=0)
	
    spec_feat = np.mean(rosa.feature.spectral_contrast(y=sig, sr=fs, n_fft=512, hop_length=256).T, axis=0)
    
    poly_feat = np.mean(rosa.feature.poly_features(y=sig, sr=fs, n_fft=512, hop_length=256).T, axis=0)
	
    rms_feat = np.mean(rosa.feature.rms(y=sig, frame_length=512, hop_length=256).T, axis=0)
	
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, spec_feat, axis=0)
	
    feat2 = np.append(feat1, poly_feat, axis=0)
    
    feat3 = np.append(feat2, rms_feat, axis=0)
	
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '/' + file_list[i]))[0].split('-')[2]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat3, obj=62, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

In [30]:
# Print out the 1D Numpy array
result_array

result_array.shape

(736029,)

In [31]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(11682, 63)

In [32]:
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({62: "Emotion"}, axis='columns')
# Delete unnecessary emotion data (calm)
df.drop(df[df['Emotion'] == 2.0].index, inplace = True)
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust", 8.0: "Surprised"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53,54,55,56,57,58,59,60,61,Emotion
0,-830.114645,56.844514,1.063355,9.075227,-1.360923,-8.758056,-7.253917,-7.637495,-7.334508,3.625505,...,6.471118,10.273737,11.739732,13.551929,15.788384,33.695234,-5e-06,0.029334,0.002079,Neutral
1,-787.113164,49.6766,-14.044976,1.930695,-15.995757,-6.209225,-12.428246,-6.174949,-5.679886,-4.64375,...,11.275636,12.838959,13.63991,15.742769,16.659574,28.343145,-7e-06,0.041733,0.003267,Neutral
2,-758.165048,57.807476,-5.704782,3.823573,-8.083287,-11.835824,-19.1044,-0.927468,-10.62954,-4.253959,...,9.114557,12.418553,12.968859,16.141742,15.360649,29.712372,-8e-06,0.048585,0.003678,Neutral
3,-824.480633,39.282392,-11.602462,0.578429,-17.562098,-8.076441,-15.299257,-9.30184,-7.62783,-1.617285,...,11.428805,12.757471,13.563244,15.151307,15.448206,28.982783,-4e-06,0.026792,0.002273,Neutral
4,-853.749128,74.107106,5.808923,13.900567,-0.950126,-2.691714,-5.746037,-2.145177,-2.390365,1.186394,...,6.930316,11.020638,13.242097,15.867247,15.729942,32.192617,-3e-06,0.01884,0.00136,Neutral
5,-874.194519,47.018062,-22.183894,4.009703,-14.337658,-13.995042,-13.372817,-9.405745,-6.20421,-7.341729,...,9.876278,11.964148,13.575127,16.220316,16.808751,31.775267,-3e-06,0.018804,0.001477,Neutral
6,-785.424871,79.212409,3.11773,14.635421,-7.772962,-9.591693,-16.742949,-2.486822,-4.799001,-5.743328,...,7.845423,12.254316,13.220987,16.245753,16.026517,33.124394,-6e-06,0.033861,0.002507,Neutral
7,-827.245897,69.46078,6.52744,18.977231,-9.324311,2.180431,-10.774387,-3.863849,-3.753699,0.369639,...,10.209306,13.490199,13.619469,16.133888,16.47894,34.455534,-4e-06,0.027066,0.00228,Neutral
8,-869.134056,100.791734,25.940873,31.231493,9.853266,2.331208,-1.843542,3.484732,3.871166,-0.305956,...,6.584999,10.843281,12.663215,14.648453,15.429564,34.138689,-3e-06,0.015884,0.001286,Neutral
9,-706.615777,55.905646,-3.480534,-0.635271,-7.120186,-11.84003,-11.553561,-9.12615,-4.139831,-6.819041,...,11.857853,14.403591,14.489168,16.700555,17.477709,35.10396,-1.6e-05,0.098882,0.009096,Neutral


In [33]:
df['Emotion'].value_counts()

Sad          1863
Disgust      1863
Angry        1863
Fearful      1863
Happy        1863
Neutral      1583
Surprised     592
Name: Emotion, dtype: int64

In [34]:
# Balance the dataset for equal number of samples for each class.
# Separate majority and minority classes
df_minority_1 = df[df.Emotion=="Neutral"]
df_majority3 = df[df.Emotion=="Happy"]
df_majority4 = df[df.Emotion=="Sad"]
df_majority5 = df[df.Emotion=="Angry"]
df_majority6 = df[df.Emotion=="Fearful"]
df_majority7 = df[df.Emotion=="Disgust"]
df_majority8 = df[df.Emotion=="Surprised"]
 
# Upsample minority class
df_minority_upsampled = resample(df_minority_1, 
                                 replace=True,     # sample with replacement
                                 n_samples=1863,    # to match majority class
                                 random_state=123) # reproducible results


# Combine majority class with upsampled minority class
df_upsampled_1 = pd.concat([df_minority_upsampled, df_majority3, df_majority4, df_majority5, df_majority6, df_majority7, df_majority8])
 
# Display new class counts
df_upsampled_1.Emotion.value_counts()

Sad          1863
Disgust      1863
Neutral      1863
Angry        1863
Fearful      1863
Happy        1863
Surprised     592
Name: Emotion, dtype: int64

In [35]:
# Reset row (sample) indexing
df_upsampled_1 = df_upsampled_1.reset_index(drop=True)

df_upsampled_1['Emotion'].value_counts()

df_minority_2 = df[df.Emotion=="Surprised"]

# Upsample minority class
df_minority_upsampled_2 = resample(df_minority_2, 
                                 replace=True,     # sample with replacement
                                 n_samples=1271,    # to match majority class
                                 random_state=123) # reproducible results

# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled_2, df_upsampled_1])
 
# Display new class counts
df_upsampled.Emotion.value_counts()

Sad          1863
Disgust      1863
Neutral      1863
Surprised    1863
Angry        1863
Fearful      1863
Happy        1863
Name: Emotion, dtype: int64

In [22]:
# Reset row (sample) indexing
df_upsampled = df_upsampled.reset_index(drop=True)

df_upsampled['Emotion'].value_counts()

Sad          1863
Disgust      1863
Neutral      1863
Surprised    1863
Angry        1863
Fearful      1863
Happy        1863
Name: Emotion, dtype: int64

In [36]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df_upsampled.iloc[0:13041, 62].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df_upsampled.iloc[0:13041, list(range(62))].values
print(y)

['Surprised' 'Surprised' 'Surprised' ... 'Surprised' 'Surprised'
 'Surprised']


In [37]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None, stratify=y) # training split = 80%, test split = 20%

# Further split training data for training and validating
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=None, stratify=y_train) # training split = 60%, validation split = 20%

In [38]:
# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=5.2, gamma=0.0302, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=5.2, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.0302, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [39]:
# Scale validation data
sc.fit(X_val)
X_val_std = sc.transform(X_val)

# Test the model data using validation data
y_pred_val = svm.predict(X_val_std)

# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data using test data
y_pred = svm.predict(X_test_std)

In [40]:
# Print out the performance metrics
print('Misclassified validation samples: %d' % (y_val != y_pred_val).sum())
print('Misclassified test samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
print('Validation Accuracy: %.2f' % svm.score(X_val_std, y_val))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

Misclassified validation samples: 575
Misclassified test samples: 688
Training Accuracy: 0.88
Validation Accuracy: 0.72
Test Accuracy: 0.74
