# Training models

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.svm import SVC
import pickle
from sklearn.mixture import GaussianMixture
import seaborn as sns
from sklearn.svm import SVR
from reusable.emotion_mapping import SklearnTwoModelWrapper
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
MODELS_PATH = r'..\models\emotion_mapping_continuous'
PROCESSED_DATASET_PATH = r"..\dataset_processed"

## Load dataset

In [2]:
with open(rf"{PROCESSED_DATASET_PATH}\functional_features.pkl", "rb") as input_file:
    X = pickle.load(input_file)

In [3]:
Y = X['Emotion']
Y.unique()

array(['Anger', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'],
      dtype=object)

In [4]:
X = X.drop(['Emotion','filepath'], axis=1)
X = StandardScaler().fit_transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42) 

## SVM

In [6]:
svm_model= SVR()
param_grid = {'C': [0.1, 1, 10,100], 'gamma': [1, 0.1, 0.01, 0.001,'auto'], 'kernel': ['linear', 'rbf']}
grid = RandomizedSearchCV(svm_model, param_grid, refit=True,verbose=1,cv=3,n_iter=10,random_state=42)
grid = SklearnTwoModelWrapper(grid)
grid.fit(X_train, y_train)

print(grid.best_params_)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[{'kernel': 'rbf', 'gamma': 'auto', 'C': 1}, {'kernel': 'rbf', 'gamma': 'auto', 'C': 1}]


In [7]:
with open(rf"{MODELS_PATH}\svm_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [8]:
y_pred = grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.5625759416767923


## XGboost

In [9]:
xgb_model = xgb.XGBRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [2, 4, 6,8],
    'learning_rate': [0.01, 0.05, 0.1, 0.5]
}
grid = RandomizedSearchCV(xgb_model, param_grid, refit=True, verbose=1,cv=3,n_iter=15,random_state=42)
grid = SklearnTwoModelWrapper(grid)
grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 3 folds for each of 15 candidates, totalling 45 fits
Fitting 3 folds for each of 15 candidates, totalling 45 fits
[{'n_estimators': 100, 'max_depth': 8, 'learning_rate': 0.1}, {'n_estimators': 100, 'max_depth': 8, 'learning_rate': 0.1}]


In [10]:
with open(rf"{MODELS_PATH}\xgb_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [11]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.5605103280680438


## Random Forest

In [20]:
rdf = RandomForestRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [2, 4, 6,8],
}
grid = GridSearchCV(rdf, param_grid, refit=True, verbose=1,cv=3)
grid = SklearnTwoModelWrapper(grid)
grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits


KeyboardInterrupt: 

In [None]:
with open(rf"{MODELS_PATH}\rdf_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [None]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.670595382746051


## MLP classifier

In [14]:
mlp_classifier = MLPRegressor(random_state=42)

param_grid = {
    'hidden_layer_sizes': [(100,), (50, 50), (100, 50),(50,50,50)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.01,0.05, 0.1],
    'max_iter': [ 300,500]
}

grid= RandomizedSearchCV(mlp_classifier, param_distributions=param_grid, n_iter=15, cv=3, verbose=1, random_state=42,refit=True)
grid = SklearnTwoModelWrapper(grid)


grid.fit(X_train, y_train)
print(grid.best_params_)

Fitting 3 folds for each of 15 candidates, totalling 45 fits


  param += update
  ret = a @ b
  ret = a @ b
  ret = a @ b
  param += update
  ret = a @ b
  ret = a @ b
  ret = a @ b
  param += update
  ret = a @ b
  ret = a @ b
  activations[i + 1] += self.intercepts_[i]
  activations[i + 1] += self.intercepts_[i]
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  param += update
  activations[i + 1] += self.intercepts_[i]
  param += update
  activations[i + 1] += self.intercepts_[i]
  ret = a @ b
  ret = a @ b
  activations[i + 1] += self.intercepts_[i]
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
12 fits failed out of a total of 45.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
12 fits failed with the following error:
Traceback (most recent call las

Fitting 3 folds for each of 15 candidates, totalling 45 fits


  ret = a @ b
  ret = a @ b
  param += update
  ret = a @ b
  ret = a @ b
  activations[i + 1] += self.intercepts_[i]
  activations[i + 1] += self.intercepts_[i]
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  param += update
5 fits failed out of a total of 45.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "d:\PW\SER_ACT\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\PW\SER_ACT\venv\lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\PW\SER_ACT\venv\lib\site-pac

[{'solver': 'sgd', 'max_iter': 300, 'learning_rate_init': 0.01, 'hidden_layer_sizes': (50, 50, 50), 'alpha': 0.0001, 'activation': 'tanh'}, {'solver': 'adam', 'max_iter': 500, 'learning_rate_init': 0.01, 'hidden_layer_sizes': (100, 50), 'alpha': 0.001, 'activation': 'logistic'}]


In [16]:
with open(rf"{MODELS_PATH}\mlp_model.pkl", "wb") as output_file:
    pickle.dump(grid, output_file)

In [17]:
y_pred = grid.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.6091130012150668


# Summary

In [21]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score
from sklearn.metrics import confusion_matrix
import pandas as pd

In [22]:
models = {}
with open(rf"{MODELS_PATH}\svm_model.pkl", "rb") as input_file:
    models['SVM'] = pickle.load(input_file)
with open(rf"{MODELS_PATH}\xgb_model.pkl", "rb") as input_file:
    models['XGB'] = pickle.load(input_file)
with open(rf"{MODELS_PATH}\rdf_model.pkl", "rb") as input_file:
    models['RDF'] = pickle.load(input_file)
with open(rf"{MODELS_PATH}\mlp_model.pkl", "rb") as input_file:
    models['MLP'] = pickle.load(input_file)

In [23]:
def evaluate(model, X_test, y_test):
    if isinstance(model, dict):
        y_pred = gmm_predict(X_test, model)
    else:
        y_pred = model.predict(X_test)
    return_dict =  {
        'Accuracy': accuracy_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'Precision': precision_score(y_test, y_pred, average='weighted',zero_division=0),
    }
    # if isinstance(model, dict):
    #     return_dict['ROC AUC'] = roc_auc_score(y_test, gmm_predict_proba(X_test, model), multi_class='ovr')
    # else:
    #     return_dict['ROC AUC'] = roc_auc_score(y_test, model.decision_function(X_test), multi_class='ovr')
    return return_dict

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [25]:
results_df = pd.DataFrame()
for model_name, model in models.items():
    result = evaluate(model, X_test,y_test)
    df = pd.DataFrame(result, index=[model_name])
    results_df = pd.concat([results_df, df])
print(results_df)

     Accuracy  F1 Score    Recall  Precision
SVM  0.562576  0.572520  0.562576   0.665513
XGB  0.560510  0.567684  0.560510   0.667720
MLP  0.609113  0.612610  0.609113   0.638910
