# Imports

In [20]:
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

# Test Models on MLS

## Load test data

In [21]:
features = 'MFCCs'
dataset = 'CETUC'

test_MLS = pd.read_csv(f'data/MLS_split_{features}_data.csv')

if features == 'Features':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr']].copy()
    Y_test = test_MLS[['Gender']].copy()#.values.ravel()
elif features == 'MFCCs':
    X_test = test_MLS[['MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()

In [22]:
# scaler = StandardScaler()
scaler = pickle.load(open(f'models/{dataset}_{features}_scaler.pkl', 'rb'))
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

## Load Models

In [23]:


filename = f'models/{dataset}_{features}_DecisionTree.sav'
tree = pickle.load(open(filename, 'rb'))
print("\nDecision Tree")
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, tree.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_RandomForest.sav'
forest = pickle.load(open(filename, 'rb'))
print("\nRandom Forests")
print("Accuracy on test set: {:.3f}".format(forest.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, forest.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_GradientBoosting.sav'
gbrt = pickle.load(open(filename, 'rb'))
print("\nGradient Boosting")
print("Accuracy on test set: {:.3f}".format(gbrt.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, gbrt.predict(X_test), labels=[1, 0])}")


Decision Tree
Accuracy on test set: 0.709
Confusion Matrix:
 [[1553  951]
 [ 476 1932]]

Random Forests
Accuracy on test set: 0.735
Confusion Matrix:
 [[1436 1068]
 [ 235 2173]]

Gradient Boosting
Accuracy on test set: 0.802
Confusion Matrix:
 [[1595  909]
 [  65 2343]]


In [24]:
filename = f'models/{dataset}_{features}_LogisticRegression.sav'
lgr = pickle.load(open(filename, 'rb'))
print("\nLogisticRegression")
print("Accuracy on test set: {:.3f}".format(lgr.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, lgr.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_SVM.sav'
svm = pickle.load(open(filename, 'rb'))
print("\nSupport Vector Machine")
print("Accuracy on test set: {:.3f}".format(svm.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, svm.predict(X_test), labels=[1, 0])}")

filename = f'models/{dataset}_{features}_MLP.sav'
mlp = pickle.load(open(filename, 'rb'))
print("\nMultilayer Perceptron")
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, Y_test)))
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, mlp.predict(X_test), labels=[1, 0])}")



LogisticRegression
Accuracy on test set: 0.783
Confusion Matrix:
 [[1490 1014]
 [  51 2357]]

Support Vector Machine
Accuracy on test set: 0.776
Confusion Matrix:
 [[1471 1033]
 [  69 2339]]

Multilayer Perceptron
Accuracy on test set: 0.755
Confusion Matrix:
 [[1324 1180]
 [  21 2387]]
