In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
from itertools import product

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


  from pandas import MultiIndex, Int64Index


In [2]:
dataset = pd.read_csv('dataset.csv', index_col=False)
not_features = ["Path", "Emotional_intensity", "Statement", "Gender"]
features_data = dataset.drop(columns=not_features)

features_data.dropna(inplace=True)
actor_list = [1, 12, 7, 24]
test_set = features_data.loc[features_data['Actor'].isin(actor_list)]
training_set = features_data.drop(test_set.index)

In [3]:
training_actors = training_set['Actor']
training_actors = training_actors.unique()

print(training_actors)

training_actors_m = []
training_actors_f = []
for actor in training_actors:
    if (actor%2 == 0):
        training_actors_f.append(actor)
    else:
        training_actors_m.append(actor)

[ 2  3  4  5  6  8  9 10 11 13 14 15 16 17 18 19 20 21 22 23]


In [4]:
 # Define the random forest model
model1 = RandomForestClassifier(max_samples=0.9, n_estimators=200, max_depth=None, n_jobs=-1, random_state=42)
model2 = XGBClassifier(objective='binary:logistic', reg_alpha=0.5, reg_lambda=1.0, n_estimators=200, random_state=42)
model3 = GradientBoostingClassifier(n_estimators=200, max_features=2, max_depth=None, random_state=42, subsample=0.9)
model4 = MLPClassifier(random_state=42, max_iter=300)
 # Define the grid
param_grid = {
    "max_samples": [0.3, 0.6, 0.7, 0.9, 0.11],
    "n_estimators": [200, 400, 600, 700, 800]
}
param_grid2 = {
    "n_estimators": [200, 400, 600],
    "reg_alpha": [0.5, 1, 1.5],
    "reg_lambda": [0.5, 1, 1.5]
}
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [5]:



def GridSearch(model, param_grid):
    best_score = 0.0
    best_params = {}

    param_names = param_grid.keys()
    param_values = param_grid.values()
    param_combinations = product(*param_values)

    for params in param_combinations:
        param_combination = dict(zip(param_names, params))

        model_accuracy = []
        model_f1 = []

        for i in range(10):
            m = training_actors_m[i]
            f = training_actors_f[i]
            cross_test_set = training_set.loc[training_set['Actor'].isin([m, f])]
            cross_training_set = training_set.drop(cross_test_set.index)

            y_train = cross_training_set["Emotions"]
            X2d_train = cross_training_set[dataset.columns[6:]]
            y_test = cross_test_set["Emotions"]
            X2d_test = cross_test_set[dataset.columns[6:]]

            # Create a new instance of the model for each parameter combination
            model_instance = model.__class__(**param_combination)

            # Fit the classifier
            model_instance.fit(X2d_train, y_train)

            # Compute the score and record it
            model_accuracy.append(model_instance.score(X2d_test, y_test))
            y_pred = model_instance.predict(X2d_test)
            model_f1.append(f1_score(y_test, y_pred, average="macro"))

        avg_accuracy = sum(model_accuracy) / len(model_accuracy)
        avg_f1 = sum(model_f1) / len(model_f1)

        if avg_accuracy > best_score:
            best_score = avg_accuracy
            best_params = param_combination

        print(f"Parameters: {param_combination}")
        print(f"Average Accuracy: {avg_accuracy}")
        print(f"Average F1 Score: {avg_f1}")
        print("--------------------")

    print("Best parameters:")
    print(best_params)
    print(f"Best Accuracy: {best_score}")


In [None]:
#richiamo la funzione per stampare le migliori combinazioni
GridSearch(model1, param_grid)

Parameters: {'max_samples': 0.3, 'n_estimators': 200}
Average Accuracy: 0.3725
Average F1 Score: 0.32949839240317413
--------------------
Parameters: {'max_samples': 0.3, 'n_estimators': 400}
Average Accuracy: 0.3883333333333333
Average F1 Score: 0.3498511073749596
--------------------
Parameters: {'max_samples': 0.3, 'n_estimators': 600}
Average Accuracy: 0.38583333333333336
Average F1 Score: 0.34128830238347996
--------------------
Parameters: {'max_samples': 0.3, 'n_estimators': 700}
Average Accuracy: 0.38416666666666666
Average F1 Score: 0.34516357946580845
--------------------
Parameters: {'max_samples': 0.3, 'n_estimators': 800}
Average Accuracy: 0.38249999999999995
Average F1 Score: 0.3420915590909796
--------------------
Parameters: {'max_samples': 0.6, 'n_estimators': 200}
Average Accuracy: 0.38166666666666665
Average F1 Score: 0.3489109583710061
--------------------
Parameters: {'max_samples': 0.6, 'n_estimators': 400}
Average Accuracy: 0.395
Average F1 Score: 0.3529839780853

In [None]:
GridSearch(model2, param_grid2)

In [None]:
GridSearch(model3, param_grid)