In [1]:
#************************************************************************************
# Rezwan Matin
# Thesis B
# Filename: ML_TESS_1.py
# Date: 01/26/20
#
# Objective:
# 26 MFCCs (mean) and 26 MFCCs (standard deviation), ZCR.
#
#*************************************************************************************

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa as rosa
import glob
import os
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import learning_curve
from sklearn.metrics import confusion_matrix

In [2]:
# Save directory path in 'path'
path = r'C:\Books\Texas State Books\Fall 2019\Thesis A\Corpus\Simulated\TESS\All'

# Declare a dummy Numpy array (row vector)
result_array = np.empty([1,54])

# Create a list of audio file names 'file_list'
file_list = os.listdir(path)

i=0

for filename in file_list:
    
    # Read WAV file. 'rosa.core.load' returns sampling frequency in 'fs' and audio signal in 'sig'
    sig, fs = rosa.core.load(path + '\\' + file_list[i], sr=None)
    
    # 'rosa.feature.mfcc' extracts n_mfccs from signal and stores it into 'mfcc_feat'
    mfcc_feat = rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26)
    
    # Calculate the average mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    avg_mfcc_feat = np.mean(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the standard deviation of mfcc (utterance-level features) using 'rosa.feat.mfcc()' and 'np.std' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    std_mfcc_feat = np.std(rosa.feature.mfcc(y=sig, sr=fs, n_mfcc=26).T,axis=0)
    
    # Calculate the average zero crossing rate (utterance-level feature) using 'rosa.feat.zero_crossing_rate()' and 'np.mean' method. '.T' transposes the rows and columns. 'axis=0' indicates average is calculated column-wise
    zcross_feat = rosa.feature.zero_crossing_rate(sig)
    avg_zcross_feat = np.mean(rosa.feature.zero_crossing_rate(y=sig).T,axis=0)
    
    # Append the three 1D arrays into a single 1D array called 'feat'.
    feat0 = np.append(avg_mfcc_feat, std_mfcc_feat, axis=0)
    
    feat1 = np.append(feat0, avg_zcross_feat, axis=0)
    
    # Save emotion label from file name. 'path' contains directory's address, 'file_list' contains file name, and '\\' joins the two to form file's address
    label = os.path.splitext(os.path.basename(path + '\\' + file_list[i]))[0].split('_')[1]
    
    # Create a new Numpy array 'sample' to store features along with label
    sample = np.insert(feat1, obj=53, values=label)
    
    result_array = np.append(result_array, sample)
    
    i+=1

# Print out the 1D Numpy array
result_array

array([8.38941806e-316, 2.56764730e-317, 5.30355226e+180, ...,
       1.08413490e+001, 1.92256627e-001, 8.00000000e+000])

In [3]:
result_array.shape

(151254,)

In [4]:
# Convert 1D Numpy array to 2D array
result_array = np.reshape(result_array, (i+1,-1))

# Delete first dummy row from 2D array
result_array = np.delete(result_array, 0, 0)

# Print final 2D Numpy array 
result_array.shape

(2800, 54)

In [5]:
# Transfer Numpy array data to a Pandas dataframe
df = pd.DataFrame(data=result_array)
# Label only the last (target) column
df = df.rename({53: "Emotion"}, axis='columns')
# Rename class labels
df['Emotion'].replace({1.0: "Neutral", 3.0: "Happy", 4.0: "Sad", 5.0: "Angry", 6.0: "Fearful", 7.0: "Disgust", 8.0: "Surprised"}, inplace=True)
# Reset row (sample) indexing
df = df.reset_index(drop=True)
df.head(12)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,Emotion
0,-528.499228,86.100738,17.797453,1.335589,14.801382,12.804018,-4.62441,-4.640589,-10.816754,2.238214,...,8.242066,7.482735,6.65166,7.027101,5.36507,5.041682,5.683295,6.459222,0.068957,Neutral
1,-497.019387,77.665822,25.75344,-4.328508,2.269791,7.581418,-0.116068,-3.125772,-15.357324,1.173361,...,6.566372,9.158605,6.761943,7.417804,7.13386,6.025158,6.76512,7.817093,0.06899,Neutral
2,-500.522992,84.506182,20.194522,2.381852,14.780523,13.248763,-15.45083,-10.223414,-14.47863,-2.73291,...,6.483032,8.213134,7.57953,5.944428,6.385273,5.115848,8.00021,7.44057,0.082913,Neutral
3,-509.476734,73.102157,17.194391,12.129123,10.979809,3.319019,-1.237639,-7.961328,-14.416996,-5.655711,...,6.285488,8.070099,6.992521,6.982331,8.478513,5.81808,7.742507,9.053821,0.095975,Neutral
4,-535.158399,64.858644,18.889478,18.277332,18.269866,9.174235,-3.113041,-0.781078,-7.781502,2.09421,...,5.980122,7.810217,8.202136,5.588915,6.953944,6.630883,6.138386,5.847282,0.092968,Neutral
5,-511.694298,86.179209,21.385005,-0.183428,10.904503,12.704529,-8.055137,-8.335903,-16.956759,-1.713734,...,6.030428,9.324065,6.606596,5.767823,6.661085,7.183514,7.855713,7.457704,0.080454,Neutral
6,-515.520162,86.41997,23.48051,5.978081,11.823975,14.172115,-10.100084,-8.877399,-15.015842,-3.209784,...,6.833197,7.36062,6.622589,7.558665,5.372733,5.382554,6.7843,7.069851,0.082897,Neutral
7,-526.924232,86.073802,28.716866,2.332954,9.453578,13.863026,-5.956343,-9.1592,-15.00914,-1.574089,...,5.673211,8.366245,8.157908,6.736358,6.130984,5.267177,6.634837,7.179037,0.082504,Neutral
8,-536.702535,95.352169,26.064563,-4.630669,4.179155,15.128372,-5.0227,-9.922548,-15.720885,0.523983,...,6.124525,7.79773,7.745395,6.927627,7.277797,6.862597,7.111186,7.310421,0.070104,Neutral
9,-504.185201,69.966657,24.419953,11.739514,10.443279,7.342729,-5.722229,-2.698441,-15.555375,-4.689976,...,7.657893,9.078432,7.326183,6.032855,6.268549,5.244469,6.467981,7.90449,0.09428,Neutral


In [6]:
df['Emotion'].value_counts()

Disgust      400
Fearful      400
Surprised    400
Angry        400
Happy        400
Sad          400
Neutral      400
Name: Emotion, dtype: int64

In [7]:
# Extract target feature 'Emotion' in a vector y. Indexing from 0
y = df.iloc[0:2800, 53].values
# Extract features 'buying' and 'safety' in a vector X. Indexing from 0
X = df.iloc[0:2800, list(range(53))].values
print(y)

['Neutral' 'Neutral' 'Neutral' ... 'Surprised' 'Surprised' 'Surprised']


In [8]:
# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1, stratify=y)

# Create an object 'sc'
sc = StandardScaler()

# Scale training data
sc.fit(X_train)
X_train_std = sc.transform(X_train)

In [12]:
# Create SVM model with hyperparameters
#svm = SVC(kernel='sigmoid', C=10.0, random_state=1)

# Best settings from GridSearchCV
svm = SVC(kernel='rbf', C=2.0, gamma=0.035, random_state=1)

# This is training the model
svm.fit(X_train_std, y_train)

SVC(C=2.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.035, kernel='rbf',
    max_iter=-1, probability=False, random_state=1, shrinking=True, tol=0.001,
    verbose=False)

In [13]:
# Scale test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

# Test the model data
y_pred = svm.predict(X_test_std)

In [14]:
# Print out the performance metrics
print('Misclassified samples: %d' % (y_test != y_pred).sum())
print('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#print('Test Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))

# Print out the performance metrics in a text file.
#text_file = open("Emotion Classifier Performance Metrics.txt", "w")
#text_file.write('Misclassified samples: %d' % (y_test != y_pred).sum())
#text_file.write('Training Accuracy: %.2f' % svm.score(X_train_std, y_train))
#text_file.write('Test Accuracy: %.2f' % svm.score(X_test_std, y_test))
#text_file.close()

Misclassified samples: 0
Training Accuracy: 1.00
Test Accuracy: 1.00


In [13]:
fs

24414