# Imports

In [1]:
import os
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

# Test Models on MLS

## Load test data

In [28]:
features = 'Features_F0_MFCCs'
dataset = 'CETUC'

project_root =  os.path.dirname(os.path.dirname(os.getcwd()))

test_MLS = pd.read_csv(os.path.join(project_root, 'data', 'MLS',f'{features}_data.csv'))

if features == 'Features':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr']].copy()
    Y_test = test_MLS[['Gender']].copy()#.values.ravel()
elif features == 'MFCCs':
    X_test = test_MLS[['MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'Features_MFCCs':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr', 
                        'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'F0':
    X_test = test_MLS[['nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'F0_MFCCs':
    X_test = test_MLS[['nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch', 
                        'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'Features_F0':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr',
        'nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()
elif features == 'Features_F0_MFCCs':
    X_test = test_MLS[['nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr',
        'nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch', 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch', 
                        'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10',
                        'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']].copy()
    Y_test = test_MLS[['Gender']].copy().values.ravel()

In [29]:
# scaler = StandardScaler()
scaler = pickle.load(open(os.path.join(project_root, 'models', dataset, features, 'scaler.pkl'), 'rb'))
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

## Load Models

In [30]:
filename = os.path.join(project_root, 'models', dataset, features, 'DecisionTree.sav')
tree = pickle.load(open(filename, 'rb'))
print("\nDecision Tree")
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, Y_test)))
cm = confusion_matrix(Y_test, tree.predict(X_test), labels=[1, 0])
precision = cm[0][0]/(cm[0][0]+cm[1][0])
recall = cm[0][0]/(cm[0][0]+cm[0][1])
# print(f"Confusion Matrix:\n {cm}")
print(f"Precision on test set:  {precision:.3f}")
print(f"Recall on test set: {recall:.3f}")
print(f"F1-score on test set: {(2 * (precision * recall) / (precision + recall)):.3f}")

filename = os.path.join(project_root, 'models', dataset, features, 'RandomForest.sav')
forest = pickle.load(open(filename, 'rb'))
print("\nRandom Forests")
print("Accuracy on test set: {:.3f}".format(forest.score(X_test, Y_test)))
cm = confusion_matrix(Y_test, forest.predict(X_test), labels=[1, 0])
precision = cm[0][0]/(cm[0][0]+cm[1][0])
recall = cm[0][0]/(cm[0][0]+cm[0][1])
# print(f"Confusion Matrix:\n {cm}")
print(f"Precision on test set:  {precision:.3f}")
print(f"Recall on test set: {recall:.3f}")
print(f"F1-score on test set: {(2 * (precision * recall) / (precision + recall)):.3f}")

filename = os.path.join(project_root, 'models', dataset, features, 'GradientBoosting.sav')
gbrt = pickle.load(open(filename, 'rb'))
print("\nGradient Boosting")
print("Accuracy on test set: {:.3f}".format(gbrt.score(X_test, Y_test)))
cm = confusion_matrix(Y_test, gbrt.predict(X_test), labels=[1, 0])
precision = cm[0][0]/(cm[0][0]+cm[1][0])
recall = cm[0][0]/(cm[0][0]+cm[0][1])
# print(f"Confusion Matrix:\n {cm}")
print(f"Precision on test set:  {precision:.3f}")
print(f"Recall on test set: {recall:.3f}")
print(f"F1-score on test set: {(2 * (precision * recall) / (precision + recall)):.3f}")


Decision Tree
Accuracy on test set: 0.817
Precision on test set:  0.780
Recall on test set: 0.897
F1-score on test set: 0.835

Random Forests
Accuracy on test set: 0.856
Precision on test set:  0.876
Recall on test set: 0.839
F1-score on test set: 0.857

Gradient Boosting
Accuracy on test set: 0.915
Precision on test set:  0.889
Recall on test set: 0.953
F1-score on test set: 0.920


In [31]:
filename = os.path.join(project_root, 'models', dataset, features, 'LogisticRegression.sav')
lgr = pickle.load(open(filename, 'rb'))
print("\nLogisticRegression")
print("Accuracy on test set: {:.3f}".format(lgr.score(X_test, Y_test)))
cm = confusion_matrix(Y_test, lgr.predict(X_test), labels=[1, 0])
precision = cm[0][0]/(cm[0][0]+cm[1][0])
recall = cm[0][0]/(cm[0][0]+cm[0][1])
# print(f"Confusion Matrix:\n {cm}")
print(f"Precision on test set:  {precision:.3f}")
print(f"Recall on test set: {recall:.3f}")
print(f"F1-score on test set: {(2 * (precision * recall) / (precision + recall)):.3f}")

filename = os.path.join(project_root, 'models', dataset, features, 'SVM.sav')
svm = pickle.load(open(filename, 'rb'))
print("\nSupport Vector Machine")
print("Accuracy on test set: {:.3f}".format(svm.score(X_test, Y_test)))
cm = confusion_matrix(Y_test, svm.predict(X_test), labels=[1, 0])
precision = cm[0][0]/(cm[0][0]+cm[1][0])
recall = cm[0][0]/(cm[0][0]+cm[0][1])
# print(f"Confusion Matrix:\n {cm}")
print(f"Precision on test set:  {precision:.3f}")
print(f"Recall on test set: {recall:.3f}")
print(f"F1-score on test set: {(2 * (precision * recall) / (precision + recall)):.3f}")

filename = os.path.join(project_root, 'models', dataset, features, 'MLP.sav')
mlp = pickle.load(open(filename, 'rb'))
print("\nMultilayer Perceptron")
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, Y_test)))
cm = confusion_matrix(Y_test, mlp.predict(X_test), labels=[1, 0])
precision = cm[0][0]/(cm[0][0]+cm[1][0])
recall = cm[0][0]/(cm[0][0]+cm[0][1])
# print(f"Confusion Matrix:\n {cm}")
print(f"Precision on test set:  {precision:.3f}")
print(f"Recall on test set: {recall:.3f}")
print(f"F1-score on test set: {(2 * (precision * recall) / (precision + recall)):.3f}")



LogisticRegression
Accuracy on test set: 0.908
Precision on test set:  0.896
Recall on test set: 0.928
F1-score on test set: 0.912

Support Vector Machine
Accuracy on test set: 0.571
Precision on test set:  0.546
Recall on test set: 0.966
F1-score on test set: 0.698

Multilayer Perceptron
Accuracy on test set: 0.784
Precision on test set:  0.968
Recall on test set: 0.600
F1-score on test set: 0.740


In [20]:
1*6*6*3+6*3+1

127

In [19]:
(5*5*5)/(6*6*6)

0.5787037037037037