In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv('dataset.csv', index_col=False)
not_features = ["Path", "Emotional_intensity", "Statement", "Gender"]
features_data = dataset.drop(columns=not_features)

features_data.dropna(inplace=True)

In [3]:
#Elaborazione dei dati
actor_list = [1, 12, 7, 24]
test_set = features_data.loc[features_data['Actor'].isin(actor_list)]
training_set = features_data.drop(test_set.index)
training_actors = training_set['Actor']
training_actors = training_actors.unique

In [4]:
# Generate some random data for demonstration purposes

y_train = training_set["Emotions"]
X2d_train = training_set[dataset.columns[6:]]
y_test = test_set["Emotions"]
X2d_test = test_set[dataset.columns[6:]]


In [5]:
 # Define the random forest model
rfm = RandomForestClassifier(max_samples=0.9, n_estimators=200, max_depth=None, n_jobs=-1, random_state=42)
 # Define the grid
grid = {
    "max_samples": [0.3, 0.6, 0.7, 0.9, 0.11],
    "n_estimators": [200, 400, 600, 700, 800]
}
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [6]:
def grid_search_rf(X2d_train, y_train, rfm, grid, skf, cv=5):
   
    # Define the grid search
    gs = GridSearchCV(rfm, grid, cv=skf, scoring=make_scorer(f1_score, average="macro"))
    # Perform the grid search
    gs.fit(X2d_train, y_train)
    # Print the best hyperparameters and corresponding F1 score
    print("Best hyperparameters: ", gs.best_params_)
    print("Best F1 score: {:.4f}".format(gs.best_score_))
    # Compute the F1 score on the test set using the best model
    best_model = gs.best_estimator_
    y_pred = best_model.predict(X2d_train)
    f1 = f1_score(y_train, y_pred, average='macro')
    print("F1 score on the test set: {:.4f}".format(f1))
    return best_model

best_model = grid_search_rf(X2d_train, y_train, rfm, grid, skf)

Best hyperparameters:  {'max_samples': 0.9, 'n_estimators': 400}
Best F1 score: 0.5668
F1 score on the test set: 1.0000


In [7]:
gs = GridSearchCV(rfm, grid, cv=skf, scoring=make_scorer(f1_score, average="macro"))

In [9]:
gs.fit(X2d_train, y_train);

In [10]:
model = gs.best_estimator_

In [11]:
y_pred = model.predict(X2d_test)

In [12]:
f1_score(y_test, y_pred, average=None)

array([0.71428571, 0.73417722, 0.53164557, 0.5       , 0.4516129 ,
       0.11111111, 0.36363636, 0.71698113])

In [13]:
f1_score(y_test, y_pred, average="macro")

0.5154312511430743

In [14]:
from sklearn.metrics import classification_report

In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       angry       0.83      0.62      0.71        32
        calm       0.62      0.91      0.73        32
     disgust       0.45      0.66      0.53        32
     fearful       0.58      0.44      0.50        32
       happy       0.47      0.44      0.45        32
     neutral       0.50      0.06      0.11        16
         sad       0.31      0.44      0.36        32
   surprised       0.90      0.59      0.72        32

    accuracy                           0.55       240
   macro avg       0.58      0.52      0.52       240
weighted avg       0.59      0.55      0.54       240



In [16]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
pd.DataFrame(cm, index=model.classes_, columns=model.classes_)

Unnamed: 0,angry,calm,disgust,fearful,happy,neutral,sad,surprised
angry,20,0,6,1,5,0,0,0
calm,0,29,0,0,0,0,3,0
disgust,0,4,21,1,3,1,2,0
fearful,3,0,2,14,4,0,7,2
happy,1,1,6,2,14,0,8,0
neutral,0,7,1,0,0,1,7,0
sad,0,6,8,4,0,0,14,0
surprised,0,0,3,2,4,0,4,19
