Models presented in notebook 5, with added mapping of emotions into a two dimensional plane, while still using discrete (classification) models, separate for each axis.

# Training models

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.svm import SVC
import pickle
from sklearn.mixture import GaussianMixture
import seaborn as sns
from reusable.emotion_mapping import SklearnTwoModelWrapper
MODELS_PATH = r'..\models\emotion_mapping_discrete'
PROCESSED_DATASET_PATH = r"..\dataset_processed"

## Load dataset

In [2]:
with open(rf"{PROCESSED_DATASET_PATH}\functional_features.pkl", "rb") as input_file:
    X = pickle.load(input_file)

In [3]:
Y = X['Emotion']
Y.unique()

array(['Anger', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'],
      dtype=object)

In [4]:
X = X.drop(['Emotion','filepath'], axis=1)
X = StandardScaler().fit_transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) 

In [6]:
np.array([x for x in y_train])

array(['Anger', 'Fear', 'Fear', ..., 'Fear', 'Happy', 'Surprise'],
      dtype='<U8')

## SVM

In [7]:
svm_model= SVC(random_state=42)
param_grid = {'C': [0.1, 1, 10,100], 'gamma': [1, 0.1, 0.01, 0.001,'auto'], 'kernel': ['linear', 'rbf']}
grid = RandomizedSearchCV(svm_model, param_grid, refit=True,verbose=1,cv=3,n_iter=10,random_state=42)
grid = SklearnTwoModelWrapper(grid).discrete()

grid.fit(X_train, y_train)

print(grid.best_params_)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[{'kernel': 'rbf', 'gamma': 'auto', 'C': 100}, {'kernel': 'rbf', 'gamma': 'auto', 'C': 100}]


In [8]:
with open(rf"{MODELS_PATH}\svm_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [9]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.7554070473876063


## Random Forest

In [10]:
rdf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [2, 4, 6,8],
}
grid = GridSearchCV(rdf, param_grid, refit=True, verbose=1,cv=3)
grid = SklearnTwoModelWrapper(grid).discrete()
grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
Fitting 3 folds for each of 12 candidates, totalling 36 fits
[{'max_depth': 8, 'n_estimators': 200}, {'max_depth': 8, 'n_estimators': 300}]


In [11]:
with open(rf"{MODELS_PATH}\rdf_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [12]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.6266099635479951


## Ensemble method

In [13]:
rbm1_params = {
    'rbm1__n_components': [50, 100, 150,200],
    'rbm1__learning_rate': [0.01, 0.05, 0.1],
    'rbm1__n_iter': [10, 20, 30,50],
    'rbm1__batch_size': [32, 64, 128]
}
rbm2_params = {
    'rbm2__n_components': [50, 100, 150,200],
    'rbm2__learning_rate': [0.01, 0.05, 0.1],
    'rbm2__n_iter': [10, 20, 30,50],
    'rbm2__batch_size': [32, 64, 128]
}
svm_model = SVC(random_state=101)
svm_params = {'svm__C': [0.1, 1, 10,100], 'svm__gamma': [1, 0.1, 0.01, 0.001], 'svm__kernel': ['linear','rbf']}

rbm1= BernoulliRBM(random_state=42)
rbm2= BernoulliRBM(random_state=42)

grid= RandomizedSearchCV(Pipeline(steps=[('rbm1', rbm1),('rbm2',rbm2),('svm', svm_model)]), [rbm1_params,rbm2_params,svm_params], n_iter=1 ,cv=3,verbose=20,refit=True)
grid = SklearnTwoModelWrapper(grid).discrete()

grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV 1/3; 1/1] START rbm1__batch_size=128, rbm1__learning_rate=0.1, rbm1__n_components=100, rbm1__n_iter=30
[CV 1/3; 1/1] END rbm1__batch_size=128, rbm1__learning_rate=0.1, rbm1__n_components=100, rbm1__n_iter=30;, score=0.312 total time=  36.7s
[CV 2/3; 1/1] START rbm1__batch_size=128, rbm1__learning_rate=0.1, rbm1__n_components=100, rbm1__n_iter=30
[CV 2/3; 1/1] END rbm1__batch_size=128, rbm1__learning_rate=0.1, rbm1__n_components=100, rbm1__n_iter=30;, score=0.317 total time=  37.7s
[CV 3/3; 1/1] START rbm1__batch_size=128, rbm1__learning_rate=0.1, rbm1__n_components=100, rbm1__n_iter=30
[CV 3/3; 1/1] END rbm1__batch_size=128, rbm1__learning_rate=0.1, rbm1__n_components=100, rbm1__n_iter=30;, score=0.319 total time=  37.4s
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV 1/3; 1/1] START svm__C=1, svm__gamma=1, svm__kernel=linear..................
[CV 1/3; 1/1] END svm__C=1, svm__gamma=1, svm__kernel=linear;, sco

In [14]:
with open(rf"{MODELS_PATH}\ensemble_model_svm.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [15]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.1997569866342649


## MLP classifier

In [16]:
mlp_classifier = MLPClassifier(random_state=42)

param_grid = {
    'hidden_layer_sizes': [(100,), (50, 50), (100, 50),(50,50,50)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.01,0.05, 0.1],
    'max_iter': [ 300,500]
}

grid= RandomizedSearchCV(mlp_classifier, param_distributions=param_grid, n_iter=15, cv=3, verbose=1, random_state=42,refit=True)
grid = SklearnTwoModelWrapper(grid).discrete()


grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 3 folds for each of 15 candidates, totalling 45 fits




Fitting 3 folds for each of 15 candidates, totalling 45 fits
[{'solver': 'sgd', 'max_iter': 500, 'learning_rate_init': 0.05, 'hidden_layer_sizes': (100, 50), 'alpha': 0.01, 'activation': 'relu'}, {'solver': 'sgd', 'max_iter': 500, 'learning_rate_init': 0.1, 'hidden_layer_sizes': (50, 50), 'alpha': 0.001, 'activation': 'logistic'}]


In [18]:
with open(rf"{MODELS_PATH}\mlp_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [19]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.7377885783718104


# Summary

In [20]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.metrics import confusion_matrix
import pandas as pd

In [21]:
models = {}
with open(rf"{MODELS_PATH}\svm_model.pkl", "rb") as input_file:
    models['SVM'] = pickle.load(input_file)
with open(rf"{MODELS_PATH}\rdf_model.pkl", "rb") as input_file:
    models['RDF'] = pickle.load(input_file)
with open(rf"{MODELS_PATH}\ensemble_model_svm.pkl", "rb") as input_file:
    models['ensemble'] = pickle.load(input_file)
with open(rf"{MODELS_PATH}\mlp_model.pkl", "rb") as input_file:
    models['MLP'] = pickle.load(input_file)

In [22]:
def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    return_dict =  {
        'Accuracy': accuracy_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'Precision': precision_score(y_test, y_pred, average='weighted',zero_division=0),
    }
    # if isinstance(model, dict):
    #     return_dict['ROC AUC'] = roc_auc_score(y_test, gmm_predict_proba(X_test, model), multi_class='ovr')
    # else:
    #     return_dict['ROC AUC'] = roc_auc_score(y_test, model.decision_function(X_test), multi_class='ovr')
    return return_dict

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [24]:
results_df = pd.DataFrame()
for model_name, model in models.items():
    result = evaluate(model, X_test,y_test)
    df = pd.DataFrame(result, index=[model_name])
    results_df = pd.concat([results_df, df])
print(results_df)

          Accuracy  F1 Score    Recall  Precision
SVM       0.755407  0.755522  0.755407   0.758707
RDF       0.626610  0.626676  0.626610   0.684386
ensemble  0.199757  0.100393  0.199757   0.296688
MLP       0.737789  0.743536  0.737789   0.758024
