In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

In [2]:
accuracy = []
f1 = []

In [3]:
grid = {
    "max_samples": [0.3, 0.6, 0.9],
    "n_estimators": [200, 400, 600]
}

In [4]:
def gridSearchRandomForest(model, grid):
    
    # inizializza l'accuratezza migliore e i migliori iperparametri
    best_accuracy = 0.0
    best_params = {}

    # esegue il loop sulla griglia di iperparametri
    for max_samples in grid["max_samples"]:
        for n_estimators in grid["n_estimators"]:
            
            # crea un nuovo modello con i parametri correnti
            model = RandomForestClassifier(max_samples=max_samples, n_estimators=n_estimators)
            # esegue la cross-validation con il modello corrente
            cross_validation(model)
            # calcola l'accuratezza media sulle 10 iterazioni
            accuracy_mean = np.mean(accuracy[-1])
            # confronta l'accuratezza corrente con quella migliore finora
            if accuracy_mean > best_accuracy:
                # se l'accuratezza corrente è migliore, aggiorna i migliori iperparametri
                best_accuracy = accuracy_mean
                best_params = {"max_samples": max_samples, "n_estimators": n_estimators}

    print("Migliori iperparametri:", best_params)
    print("Accuratezza migliore:", best_accuracy)
    #In questo caso, la funzione cross_validation viene chiamata all'interno dei loop per calcolare
    #l'accuratezza del modello su ogni combinazione di iperparametri. La lista accuracy viene utilizzata
    #per salvare l'accuratezza su ogni iterazione, e alla fine del loop, l'accuratezza media viene calcolata
    #come la media delle accuratezze su tutte le iterazioni. 
    
    model_accuracy = []
    model_f1 = []
    for i in range(10):
        m = training_actors_m[i]
        f = training_actors_f[i]

        cross_test_set = training_set.loc[training_set['Actor'].isin([m, f])]
        cross_training_set = training_set.drop(cross_test_set.index)

        y_train = cross_training_set["Emotions"]
        X2d_train = cross_training_set[dataset.columns[6:]]
        y_test = cross_test_set["Emotions"]
        X2d_test = cross_test_set[dataset.columns[6:]]

        # fit the classifier
        model.fit(X2d_train, y_train)
        # compute the score and record it
        model_accuracy.append(model.score(X2d_test, y_test))
        y_pred = model.predict(X2d_test)
        model_f1.append(f1_score(y_test, y_pred, average="macro"))
        
    accuracy.append(model_accuracy)
    f1.append(model_f1)

In [5]:
dataset = pd.read_csv('dataset.csv', index_col=False)
not_features = ["Path", "Emotional_intensity", "Statement", "Gender"]
features_data = dataset.drop(columns=not_features)
features_data
actor_list = [1, 12, 7, 24]
test_set = features_data.loc[features_data['Actor'].isin(actor_list)]
training_set = features_data.drop(test_set.index)

In [6]:
y_train = training_set["Emotions"]
X2d_train = training_set[dataset.columns[6:]]
y_test = test_set["Emotions"]
X2d_test = test_set[dataset.columns[6:]]


In [7]:
rfm = RandomForestClassifier(max_samples=0.9, n_estimators=200, max_depth=None, n_jobs=-1, random_state=42)

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

In [9]:
skf = StratifiedKFold(5, shuffle=True, random_state=42)

In [10]:
gs = GridSearchCV(rfm, grid, cv=skf, scoring=make_scorer(f1_score, average="macro"))

In [11]:
gs.fit(X2d_train, y_train);

In [12]:
gs.best_params_

{'max_samples': 0.9, 'n_estimators': 400}

In [13]:
model = gs.best_estimator_

In [44]:
model.score(X2d_test, y_test)

0.55

In [45]:
y_pred = model.predict(X2d_test)

In [46]:
f1_score(y_test, y_pred, average=None)

array([0.71428571, 0.73417722, 0.53164557, 0.5       , 0.4516129 ,
       0.11111111, 0.36363636, 0.71698113])

In [47]:
f1_score(y_test, y_pred, average="macro")

0.5154312511430743

In [48]:
from sklearn.metrics import classification_report

In [49]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       angry       0.83      0.62      0.71        32
        calm       0.62      0.91      0.73        32
     disgust       0.45      0.66      0.53        32
     fearful       0.58      0.44      0.50        32
       happy       0.47      0.44      0.45        32
     neutral       0.50      0.06      0.11        16
         sad       0.31      0.44      0.36        32
   surprised       0.90      0.59      0.72        32

    accuracy                           0.55       240
   macro avg       0.58      0.52      0.52       240
weighted avg       0.59      0.55      0.54       240



In [50]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
pd.DataFrame(cm, index=model.classes_, columns=model.classes_)

Unnamed: 0,angry,calm,disgust,fearful,happy,neutral,sad,surprised
angry,20,0,6,1,5,0,0,0
calm,0,29,0,0,0,0,3,0
disgust,0,4,21,1,3,1,2,0
fearful,3,0,2,14,4,0,7,2
happy,1,1,6,2,14,0,8,0
neutral,0,7,1,0,0,1,7,0
sad,0,6,8,4,0,0,14,0
surprised,0,0,3,2,4,0,4,19


In [51]:
from xgboost import XGBClassifier

  from pandas import MultiIndex, Int64Index


In [52]:
xgbm = XGBClassifier(objective='binary:logistic', reg_alpha=0.5, reg_lambda=1.0, n_estimators=200, random_state=42)

In [53]:
grid = {
    "n_estimators": [200, 400, 600],
    "reg_alpha": [0.5, 1, 1.5],
    "reg_lambda": [0.5, 1, 1.5]
}