In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
dataset = pd.read_csv('dataset.csv', index_col=False)
not_features = ["Path", "Emotional_intensity", "Statement", "Gender"]
features_data = dataset.drop(columns=not_features)

features_data.dropna(inplace=True)
actor_list = [1, 12, 7, 24]
test_set = features_data.loc[features_data['Actor'].isin(actor_list)]
training_set = features_data.drop(test_set.index)

In [3]:
training_actors = training_set['Actor']
training_actors = training_actors.unique()

print(training_actors)

training_actors_m = []
training_actors_f = []
for actor in training_actors:
    if (actor%2 == 0):
        training_actors_f.append(actor)
    else:
        training_actors_m.append(actor)

[ 2  3  4  5  6  8  9 10 11 13 14 15 16 17 18 19 20 21 22 23]


In [4]:
 # Define the random forest model
model = RandomForestClassifier(max_depth=None, bootstrap=True, n_jobs=-1, random_state=42)
 # Define the grid
param_grid = {
    "max_samples": [0.3, 0.6, 0.7, 0.9, 0.11],
    "n_estimators": [200, 400, 600, 700, 800]
}
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [5]:
def GridSearch(model, param_grid):
    best_score = 0.0 #verrà inserito punteggio migliore
    best_params = {} #per memorizzare i migliori parametri trovati

    for max_samples in param_grid['max_samples']:
        for n_estimators in param_grid['n_estimators']:
            model_accuracy = []
            model_f1 = []

            for i in range(10):
                m = training_actors_m[i]
                f = training_actors_f[i]
 #selezioniamo tutte le righe dal training_set in cui l'attore è uno dei due attori di addestramento selezionati.
                cross_test_set = training_set.loc[training_set['Actor'].isin([m, f])] #insieme di test
               
                cross_training_set = training_set.drop(cross_test_set.index) #insieme di addestramento

                y_train = cross_training_set["Emotions"]
                X2d_train = cross_training_set[dataset.columns[6:]]
                y_test = cross_test_set["Emotions"]
                X2d_test = cross_test_set[dataset.columns[6:]]

                # Set the model parameters
                model.max_samples = max_samples
                model.n_estimators = n_estimators

                # Fit the classifier
                model.fit(X2d_train, y_train)

                # Compute the score and record it
                model_accuracy.append(model.score(X2d_test, y_test)) 
                y_pred = model.predict(X2d_test)
                model_f1.append(f1_score(y_test, y_pred, average="macro"))

            avg_accuracy = sum(model_accuracy) / len(model_accuracy) #calcoliamo la media per le 10 iterazioni
            avg_f1 = sum(model_f1) / len(model_f1)

            if avg_accuracy > best_score: 
                #Se l'accuratezza media è superiore al punteggio migliore attuale, 
                #viene aggiornato il punteggio migliore
                best_score = avg_accuracy
                best_params['max_samples'] = max_samples
                best_params['n_estimators'] = n_estimators

            print(f"max_samples: {max_samples}, n_estimators: {n_estimators}")
            print(f"Average Accuracy: {avg_accuracy}")
            print(f"Average F1 Score: {avg_f1}")
            print("--------------------")

    print("Best parameters:")
    print(f"max_samples: {best_params['max_samples']}, n_estimators: {best_params['n_estimators']}")
    print(f"Best Accuracy: {best_score}")




In [None]:
#richiamo la funzione per stampare le migliori combinazioni
GridSearch(model, param_grid)

max_samples: 0.3, n_estimators: 200
Average Accuracy: 0.3625
Average F1 Score: 0.32265357067992656
--------------------
max_samples: 0.3, n_estimators: 400
Average Accuracy: 0.38
Average F1 Score: 0.3390711168177664
--------------------
max_samples: 0.3, n_estimators: 600
Average Accuracy: 0.38416666666666666
Average F1 Score: 0.34453123548014236
--------------------
max_samples: 0.3, n_estimators: 700
Average Accuracy: 0.38333333333333336
Average F1 Score: 0.3435280683792147
--------------------
max_samples: 0.3, n_estimators: 800
Average Accuracy: 0.3833333333333333
Average F1 Score: 0.3436383597457204
--------------------
max_samples: 0.6, n_estimators: 200
Average Accuracy: 0.38249999999999995
Average F1 Score: 0.3423551503715516
--------------------
max_samples: 0.6, n_estimators: 400
Average Accuracy: 0.3899999999999999
Average F1 Score: 0.3525449786416241
--------------------
max_samples: 0.6, n_estimators: 600
Average Accuracy: 0.39583333333333326
Average F1 Score: 0.3554615482