# Imports

In [6]:
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

# Test Models on MLS

## Load test data

In [7]:
features = 'F0_MFCCs'
dataset = 'CETUC'

test_MLS = pd.read_csv(f'data/MLS_split_{features}_data.csv')

if features == 'Features':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr']].copy()
    Y_test = test_MLS[['Gender']].copy()#.values.ravel()
elif features == 'MFCCs':
    X_test = test_MLS[['MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'Features_MFCCs':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr', 
                        'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'F0':
    X_test = test_MLS[['nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'F0_MFCCs':
    X_test = test_MLS[['nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch', 
                        'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'Features_F0_MFCCs':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr',
        'nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch', 
                        'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()

In [8]:
# scaler = StandardScaler()
scaler = pickle.load(open(f'models/{dataset}_{features}_scaler.pkl', 'rb'))
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

## Load Models

In [9]:


filename = f'models/{dataset}_{features}_DecisionTree.sav'
tree = pickle.load(open(filename, 'rb'))
print("\nDecision Tree")
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, tree.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_RandomForest.sav'
forest = pickle.load(open(filename, 'rb'))
print("\nRandom Forests")
print("Accuracy on test set: {:.3f}".format(forest.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, forest.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_GradientBoosting.sav'
gbrt = pickle.load(open(filename, 'rb'))
print("\nGradient Boosting")
print("Accuracy on test set: {:.3f}".format(gbrt.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, gbrt.predict(X_test), labels=[1, 0])}")


Decision Tree
Accuracy on test set: 0.786
Confusion Matrix:
 [[2207  294]
 [ 754 1653]]

Random Forests
Accuracy on test set: 0.838
Confusion Matrix:
 [[2290  211]
 [ 582 1825]]

Gradient Boosting
Accuracy on test set: 0.849
Confusion Matrix:
 [[2264  237]
 [ 502 1905]]


In [10]:
filename = f'models/{dataset}_{features}_LogisticRegression.sav'
lgr = pickle.load(open(filename, 'rb'))
print("\nLogisticRegression")
print("Accuracy on test set: {:.3f}".format(lgr.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, lgr.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_SVM.sav'
svm = pickle.load(open(filename, 'rb'))
print("\nSupport Vector Machine")
print("Accuracy on test set: {:.3f}".format(svm.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, svm.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_MLP.sav'
mlp = pickle.load(open(filename, 'rb'))
print("\nMultilayer Perceptron")
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, mlp.predict(X_test), labels=[1, 0])}")



LogisticRegression
Accuracy on test set: 0.791
Confusion Matrix:
 [[2080  421]
 [ 604 1803]]

Support Vector Machine
Accuracy on test set: 0.812
Confusion Matrix:
 [[2345  156]
 [ 766 1641]]

Multilayer Perceptron
Accuracy on test set: 0.798
Confusion Matrix:
 [[2222  279]
 [ 711 1696]]
