In [None]:
# Load libraries
import time
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, recall_score, precision_score
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import plot_confusion_matrix

In [None]:
# Load the data
def load_data():
    df = pd.read_csv('SAASE features.csv')
    target_name = 'Emotion'
    """make code line as note if you want to include it in the experiment"""
    # observed emotion
    #df.drop(df[df['Emotion'] == "neutral"].index, inplace = True)
    #df.drop(df[df['Emotion'] == "angry"].index, inplace = True)
    #df.drop(df[df['Emotion'] == "happy"].index, inplace = True)
    df.drop(df[df['Emotion'] == "sad"].index, inplace = True)
    
    # observed gender
    #df.drop(df[df['gender'] == "male"].index, inplace = True)
    df.drop(df[df['gender'] == "female"].index, inplace = True)
    
    # remove unrelated colunms (This line is fixed and cannot be hide as a note)
    df.drop(['speaker','gender','voiceID'], inplace=True, axis=1)
    
    # speech features
    """make code line as note if you want to include it in the features set of the experiment"""
    # Prosodic 1
    #df.drop(['Pitch mean', 'Pitch stdev', 'Pitch max', 'Pitch min', 'Pitch range','Intensity mean', 'Intensity stdev', 'Intensity max', 'Intensity min', 'Intensity range','Jitter PCA','shimmer PCA'], inplace=True, axis=1)
    
    #Formants
    #df.drop(['Formant 1 mean','Formant 1 stdev','Formant 2 mean','Formant 2 stdev','Formant 3 mean','Formant 3 stdev'], inplace=True, axis=1)
    
    # MFCC
    #df.drop(['MFCC 1','MFCC 2','MFCC 3','MFCC 4','MFCC 5','MFCC 6','MFCC 7','MFCC 8','MFCC 9','MFCC 10','MFCC 11','MFCC 12','MFCC 13'], inplace=True, axis=1)    
   
    #LTAS
    #df.drop(['LTAS mean', 'LTAS stdev', 'LTAS max', 'LTAS min', 'LTAS range', 'LTAS slope'], inplace=True, axis=1)
    
    # Wavelet
    #df.drop(['Ed5','Ed4', 'Ed3', 'Ed2', 'Ed1', 'Ea', 'wentropy'], inplace=True, axis=1)
    
    # Prosodic 2
    #df.drop(['HNR', 'Duration','ZCR'], inplace=True, axis=1)
    
    # LPC
    #df.drop(['LPC'], inplace=True, axis=1)
    
    y = df[target_name]
    x = df.drop(target_name, axis=1)
    
    return x,y

In [None]:
# select #k of top features based on p-value
def select_features(x, y, k):
    # configure to select all features
    fs = SelectKBest(score_func=f_classif, k=k)
    # learn relationship from training data
    fs.fit(x, y)
    # transform train input data
    x_fs = fs.transform(x)   
    return x_fs, fs


#draw confusion matrix
def plot_confusion_matrix(matrix, sorted_labels : list):    
    plt.figure(figsize=(12.8,6))
    sns.heatmap(matrix, annot=True, xticklabels=sorted_labels, yticklabels=sorted_labels, cmap="Blues", fmt="g")
    plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix')
    plt.show()

In [None]:
def cross_val_predict(model, x, y):
    kf= StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
    predicted_targets = np.array([]) # predicted
    actual_targets = np.array([]) # y_test
    for train_index, test_index in kf.split(x,y):  
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(x_train, y_train)
        
        predicted_labels = model.predict(x_test)
        predicted_targets = np.append(predicted_targets, predicted_labels)
        actual_targets = np.append(actual_targets, y_test)

    return actual_targets, predicted_targets

In [None]:
tic = time.perf_counter()
#loading_data
x,y = load_data()
# define number of features to evaluate
num_features = [i+1 for i in range(x.shape[1])] #number of all features
num_features_n = [42] #define specific number of features

for k in num_features_n:
    ti = time.perf_counter()
    # feature selection
    x_fs, fs = select_features(x, y, k)
    
    # SVM with linear kernel
    model = SVC(kernel = 'linear', C = 1)
        
    # evaluate the model
    #x = x.to_numpy()
    y1 = y.to_numpy()
    y_test, predicted = cross_val_predict(model, x_fs, y1)
    
    # calculate evaluation parameters 
    accuracy = accuracy_score(y_test, predicted)
    conf_matrix = confusion_matrix(y_test, predicted)
    precision = precision_score(y_test, predicted, average= None)
    recall = recall_score(y_test, predicted, average= None)
    
    # print results
    print("Both")
    print("Avg accuracy: ", accuracy *100)
    print("Avg precision: ", precision)
    print("Avg recall: ", recall)

    plot_confusion_matrix(conf_matrix, ["Anger", "Happy", "neutral", "sad"])

    to = time.perf_counter()
    print(f"Running time: {(to - ti)/60:0.4f} minutes")
toc = time.perf_counter()
print(f"Running time: {(toc - tic)/60:0.4f} minutes")