In [1]:
import numpy as np
import pandas as pd
import glob
import os, sys
import matplotlib.pyplot as plt
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
VERSION = 4
RANDOM_SEED = 26

## Load data

### Load Data - downsampled

In [2]:
# df_joint_train_org   = pd.read_csv(f'./features/cache_all_features_train_V{VERSION}.csv')
# df_joint_train_org   = df_joint_train_org.drop(columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])
                       
# df_joint_test_org = pd.read_csv(f'./features/cache_all_features_test_V{VERSION}.csv').drop(
#     columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])

# print("shape of train set: ", df_joint_train_org.shape)
# print("shape of test  set: ", df_joint_test_org.shape)


# df_joint_train  = pd.read_csv(f'./features/cache_train_V4_resampled_2500.csv')
df_joint_test = pd.read_csv(f'./cache_all_features_test_V4.csv')

# print("shape of train set: ", df_joint_train.shape)
print("shape of test  set: ", df_joint_test.shape)

df_joint_train_aug  = pd.read_csv(f'./cache_train_V4_augmented.csv')
feature_column_names = [i for i in df_joint_train_aug.columns \
                        if i not in ['file_path','renamed_file_path','split','sentiment_value','emotional_category']]
             
print("shape of train set: ", df_joint_train_aug.shape)
df_joint_train_aug.groupby('sentiment_value')['file_path'].count()

shape of test  set:  (1180, 1550)
shape of train set:  (24885, 1546)


sentiment_value
-1    7999
 0    8560
 1    8326
Name: file_path, dtype: int64

### best guess feature combinations

In [3]:
# generate selected features 
def generate_selected_features_by_type(feature_column_names,input,stats,number=1):
    selected_result = []
    for name in feature_column_names:
        if input+"_"+stats in name:
            selected_result.append(name)
    if number < len(selected_result):
        selected_result = selected_result[:number]
    return selected_result

# example to take mfcc 20 mean & std; mel32; zcr all 5 stats features
feature_MFCC20_mean  = generate_selected_features_by_type(feature_column_names,"mfcc","mean",20)
feature_MFCC20_std   = generate_selected_features_by_type(feature_column_names,"mfcc","std",20)
feature_mel32_median = generate_selected_features_by_type(feature_column_names,"mel32","median",32)
feature_mel32_std    = generate_selected_features_by_type(feature_column_names,"mel32","std",32)
feature_zcr_stats    = generate_selected_features_by_type(feature_column_names,"zcr","",5)
feature_rms_stats    = generate_selected_features_by_type(feature_column_names,"rms","",5)
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum','Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
selected_HNR = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
selected_prosody = selected_intensity + selected_pitch # + ['Local Jitter','Local Shimmer']
selected_feature_names = feature_MFCC20_mean + feature_MFCC20_std + feature_mel32_median + feature_mel32_std + \
                        feature_zcr_stats + feature_rms_stats + selected_intensity + selected_pitch 

In [4]:
# default use augmented training set and balanced test set
X_train = df_joint_train_aug[selected_feature_names]
y_train_s = df_joint_train_aug['sentiment_value']
y_train_e = df_joint_train_aug['emotional_category']

# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# y_e_num = label_encoder.fit_transform(y_train_e)

X_test = df_joint_test[selected_feature_names]
y_test_s = df_joint_test['sentiment_value']
y_test_e = df_joint_test['emotional_category']

# y_test_e_num = label_encoder.fit_transform(y_test_e)

In [5]:
X_train.shape, X_test.shape

((24885, 128), (1180, 128))

In [6]:
# !pip install lightgbm

## Models

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score, precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier,HistGradientBoostingClassifier
from lightgbm import LGBMClassifier

# Common adjustable parameters
common_params = {
    'RandomForest': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest1': {'n_estimators': 500, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest2': {'n_estimators': 1000, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest3': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 3, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest4': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 5, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest5': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 10, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest6': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 20, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest7': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':50, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest8': {'n_estimators': 200, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':200, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest9': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':500, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest10': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':5, 'random_state': RANDOM_SEED},
    'RandomForest11': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':10, 'random_state': RANDOM_SEED},
    'RandomForest12': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':False, 'n_jobs':3, 'random_state': RANDOM_SEED},

    
    
    'SVM': {'kernel': 'rbf', 'C': 1.0, 'probability': True},
    'KNN': {'n_neighbors': 2},
    'KNN1': {'n_neighbors': 3},
    'KNN2': {'n_neighbors': 4},
    'KNN3': {'n_neighbors': 5},
    'KNN4': {'n_neighbors': 6},
    'KNN5': {'n_neighbors': 8},
    'KNN6': {'n_neighbors': 10},
    'KNN7': {'n_neighbors': 12},
    'KNN8': {'n_neighbors': 15},
    'KNN9': {'n_neighbors': 20},
    'KNN10': {'n_neighbors': 30},
    'GradientBoosting': {'loss': 'log_loss', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                         'criterion': 'friedman_mse', 'min_samples_split': 2, 'max_depth': 3},
    'GradientBoostingFast': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 50},
    'GradientBoostingFast1': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 100},
    'GradientBoostingFast2': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 200},
    'GradientBoostingFast3': {'loss': 'log_loss', 'learning_rate': 0.05, 'max_iter': 50},
    'GradientBoostingFast4': {'loss': 'log_loss', 'learning_rate': 0.05, 'max_iter': 100},
    'GradientBoostingFast5': {'loss': 'log_loss', 'learning_rate': 0.05, 'max_iter': 200},
    'GradientBoostingFast6': {'loss': 'log_loss', 'learning_rate': 0.5, 'max_iter': 50},
    'GradientBoostingFast7': {'loss': 'log_loss', 'learning_rate': 0.5, 'max_iter': 100},
    'GradientBoostingFast8': {'loss': 'log_loss', 'learning_rate': 0.5, 'max_iter': 200},
    'GradientBoostingFast9': {'loss': 'log_loss', 'learning_rate': 0.01, 'max_iter': 50},
    'GradientBoostingFast10': {'loss': 'log_loss', 'learning_rate': 0.01, 'max_iter': 100},
    'GradientBoostingFast11': {'loss': 'log_loss', 'learning_rate': 0.01, 'max_iter': 200},
    'GradientBoostingFast12': {'loss': 'log_loss', 'learning_rate': 1, 'max_iter': 30},
    'GradientBoostingFast13': {'loss': 'log_loss', 'learning_rate': 1, 'max_iter': 50},
    'GradientBoostingFast14': {'loss': 'log_loss', 'learning_rate': 1, 'max_iter': 100},
    'AdaBoost': {'n_estimators': 50, 'learning_rate': 1.0},
    'LightGBM': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}    
}

# Models with common adjustable parameters
dtree   = DecisionTreeClassifier()
rforest  = RandomForestClassifier(**common_params['RandomForest'])
rforest1 = RandomForestClassifier(**common_params['RandomForest1'])
rforest2 = RandomForestClassifier(**common_params['RandomForest2'])
rforest3 = RandomForestClassifier(**common_params['RandomForest3'])
rforest4 = RandomForestClassifier(**common_params['RandomForest4'])
rforest5 = RandomForestClassifier(**common_params['RandomForest5'])
rforest6 = RandomForestClassifier(**common_params['RandomForest6'])
rforest7 = RandomForestClassifier(**common_params['RandomForest7'])
rforest8 = RandomForestClassifier(**common_params['RandomForest8'])
rforest9 = RandomForestClassifier(**common_params['RandomForest9'])
rforest10 = RandomForestClassifier(**common_params['RandomForest10'])
rforest11 = RandomForestClassifier(**common_params['RandomForest11'])
rforest12 = RandomForestClassifier(**common_params['RandomForest12'])

svm     = SVC(**common_params['SVM'])
knn     = KNeighborsClassifier(**common_params['KNN'])
knn1     = KNeighborsClassifier(**common_params['KNN1'])
knn2     = KNeighborsClassifier(**common_params['KNN2'])
knn3     = KNeighborsClassifier(**common_params['KNN3'])
knn4     = KNeighborsClassifier(**common_params['KNN4'])
knn5     = KNeighborsClassifier(**common_params['KNN5'])
knn6     = KNeighborsClassifier(**common_params['KNN6'])
knn7     = KNeighborsClassifier(**common_params['KNN7'])
knn8     = KNeighborsClassifier(**common_params['KNN8'])
knn9     = KNeighborsClassifier(**common_params['KNN9'])
knn10     = KNeighborsClassifier(**common_params['KNN10'])
gboost  = GradientBoostingClassifier(**common_params['GradientBoosting'])
gb_fast = HistGradientBoostingClassifier(**common_params['GradientBoostingFast'])
gb_fast1 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast1'])
gb_fast2 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast2'])
gb_fast3 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast3'])
gb_fast4 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast4'])
gb_fast5 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast5'])
gb_fast6 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast6'])
gb_fast7 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast7'])
gb_fast8 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast8'])
gb_fast9 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast9'])
gb_fast10 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast10'])
gb_fast11 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast11'])
gb_fast12 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast12'])
gb_fast13 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast13'])
gb_fast14 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast14'])
adaBoost= AdaBoostClassifier(**common_params['AdaBoost'])
lightgbm=LGBMClassifier(**common_params['LightGBM'])

In [8]:
# def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test):
#     start = time.time()
#     print(f'Model Name: {clf_model.__class__};\n Train set shape {X_train.shape}, num of class {y_train.unique().size}')
#     predictions = clf_model.fit(X_train, y_train).predict(X_test.values)
    
#     print(classification_report(y_test, predictions))
#     print(confusion_matrix(y_test, predictions))
    
#     precision, recall, f1score, support = precision_recall_fscore_support(y_test, predictions, average=None)
#     # TODO make all metrics into result dict for recording 
#     probabilities = clf_model.predict_proba(X_test.values)
#     print('prbabilities distribution: \n', pd.DataFrame(probabilities,columns=clf_model.classes_).describe())
#     print(f'Time taken: {round(time.time()-start,3)} seconds.\n')

In [9]:
import pickle

def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test,verbose=True):
    start = time.time()
    
    clf_model.fit(X_train, y_train)
    predictions = clf_model.predict(X_test.values)
    
    # Calculate metrics
    report = classification_report(y_test, predictions, output_dict=True)
    metrics = {
        'accuracy': report['accuracy'],
        'precision': report['macro avg']['precision'],
        'recall': report['macro avg']['recall'],
        'f1-score': report['macro avg']['f1-score']
    }
    for class_name in report.keys():
        if class_name not in ['accuracy', 'macro avg', 'weighted avg']:
            metrics[class_name+'_precision'] = report[class_name]['precision']
            metrics[class_name+'_recall'] = report[class_name]['recall'],
            metrics[class_name+'_f1-score'] = report[class_name]['f1-score']
    
    feature_columns = list(X_train.columns)
    num_classes = y_train.nunique()
    class_names = list(y_train.unique())
    
    model_filename = f"./models/{clf_model.__class__.__name__}_model"
    model_filename += f"_{num_classes}cls_{len(feature_columns)}feat_{round(report['accuracy']*100)}acc.pkl"
    with open(model_filename, 'wb') as file:
        pickle.dump(clf_model, file)
    
    results = {**metrics,
        'num_classes': num_classes,
        'class_names': class_names,
        'model_filename': model_filename,
        'feature_columns': feature_columns,
    }
    
    if verbose:
        print(f"Model Name: {clf_model.__class__.__name__};\nTrain set shape {X_train.shape}, num of class {num_classes}")
        print(classification_report(y_test, predictions))
        print(confusion_matrix(y_test, predictions))
        probabilities = clf_model.predict_proba(X_test.values)
        print('Probabilities distribution:\n', pd.DataFrame(probabilities, columns=clf_model.classes_).describe())
    print(f"Model: {clf_model.__class__.__name__};Time taken: {round(time.time()-start, 3)} seconds.\n")

    return results, clf_model


### Sentiment 3-class Classifier Sample code

In [10]:
result, m_trained = exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.84      0.84       748
           0       0.62      0.80      0.70       183
           1       0.77      0.59      0.67       249

    accuracy                           0.78      1180
   macro avg       0.74      0.74      0.74      1180
weighted avg       0.79      0.78      0.78      1180

[[631  76  41]
 [ 34 147   2]
 [ 90  13 146]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.579832     0.200633     0.219535
std       0.326096     0.286464     0.283114
min       0.001921     0.000342     0.000787
25%       0.288254     0.003548     0.025408
50%       0.645958     0.035752     0.093288
75%       0.882237     0.320368     0.296099
max       0.997356     0.997293     0.994807
Model: HistGradientBoostingClassifier;Time taken: 

In [11]:
result, m_trained = exp_clf_with_feature_selected(gb_fast1, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.87      0.85       748
           0       0.64      0.76      0.70       183
           1       0.81      0.61      0.69       249

    accuracy                           0.80      1180
   macro avg       0.76      0.75      0.75      1180
weighted avg       0.80      0.80      0.80      1180

[[651  63  34]
 [ 42 139   2]
 [ 84  14 151]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.602846     0.188156     0.208998
std       0.344864     0.294542     0.295917
min       0.000430     0.000014     0.000067
25%       0.304320     0.000684     0.014157
50%       0.688532     0.015482     0.065842
75%       0.935971     0.279476     0.267506
max       0.999279     0.999199     0.998946
Model: HistGradientBoostingClassifier;Time taken: 

In [12]:
result, m_trained = exp_clf_with_feature_selected(gb_fast2, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.83      0.88      0.86       748
           0       0.66      0.77      0.71       183
           1       0.81      0.58      0.68       249

    accuracy                           0.80      1180
   macro avg       0.77      0.74      0.75      1180
weighted avg       0.80      0.80      0.79      1180

[[658  60  30]
 [ 40 140   3]
 [ 93  12 144]]
Probabilities distribution:
                 -1             0            1
count  1180.000000  1.180000e+03  1180.000000
mean      0.628253  1.822835e-01     0.189463
std       0.369359  3.096184e-01     0.304554
min       0.000054  4.376323e-07     0.000004
25%       0.277474  9.640618e-05     0.004224
50%       0.772245  4.655065e-03     0.034353
75%       0.971246  2.343481e-01     0.215331
max       0.999829  9.999366e-01     0.999852
Model: HistGradientBoostingClassifier;Tim

In [13]:
result, m_trained = exp_clf_with_feature_selected(gb_fast3, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.85      0.81      0.83       748
           0       0.58      0.81      0.67       183
           1       0.74      0.61      0.67       249

    accuracy                           0.77      1180
   macro avg       0.72      0.75      0.73      1180
weighted avg       0.78      0.77      0.77      1180

[[609  90  49]
 [ 31 148   4]
 [ 78  18 153]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.534023     0.219057     0.246921
std       0.289895     0.264430     0.264177
min       0.011405     0.005486     0.006868
25%       0.298396     0.020340     0.055992
50%       0.552432     0.077829     0.138554
75%       0.800916     0.371871     0.345190
max       0.979871     0.981555     0.976049
Model: HistGradientBoostingClassifier;Time taken: 

In [14]:
result, m_trained = exp_clf_with_feature_selected(gb_fast4, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.85      0.85       748
           0       0.64      0.79      0.71       183
           1       0.78      0.62      0.69       249

    accuracy                           0.79      1180
   macro avg       0.75      0.75      0.75      1180
weighted avg       0.80      0.79      0.79      1180

[[638  68  42]
 [ 36 144   3]
 [ 82  12 155]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.574983     0.198485     0.226532
std       0.321220     0.281643     0.283442
min       0.002803     0.000362     0.000827
25%       0.299669     0.003597     0.028734
50%       0.617806     0.035650     0.100456
75%       0.877130     0.324965     0.313763
max       0.997322     0.995728     0.995441
Model: HistGradientBoostingClassifier;Time taken: 

In [15]:
result, m_trained = exp_clf_with_feature_selected(gb_fast5, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.87      0.85       748
           0       0.63      0.74      0.68       183
           1       0.82      0.62      0.71       249

    accuracy                           0.80      1180
   macro avg       0.76      0.74      0.75      1180
weighted avg       0.80      0.80      0.80      1180

[[650  67  31]
 [ 45 136   2]
 [ 83  12 154]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.607902     0.187259     0.204839
std       0.342171     0.292539     0.290895
min       0.000515     0.000015     0.000108
25%       0.304054     0.000690     0.014104
50%       0.700370     0.016721     0.063814
75%       0.935208     0.284100     0.251976
max       0.999020     0.999356     0.998912
Model: HistGradientBoostingClassifier;Time taken: 

In [16]:
result, m_trained = exp_clf_with_feature_selected(gb_fast6, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.83      0.83       748
           0       0.60      0.71      0.65       183
           1       0.72      0.63      0.67       249

    accuracy                           0.77      1180
   macro avg       0.72      0.72      0.72      1180
weighted avg       0.77      0.77      0.77      1180

[[623  68  57]
 [ 48 130   5]
 [ 75  17 157]]
Probabilities distribution:
                 -1             0             1
count  1180.000000  1.180000e+03  1.180000e+03
mean      0.620790  1.854884e-01  1.937218e-01
std       0.399865  3.300765e-01  3.316734e-01
min       0.000003  1.768550e-10  1.952942e-08
25%       0.170467  2.180976e-05  8.252229e-04
50%       0.817755  1.312227e-03  1.009409e-02
75%       0.992808  1.851899e-01  2.074913e-01
max       0.999992  9.999966e-01  9.999917e-01
Model: HistGradientBoostingClass

In [17]:
result, m_trained = exp_clf_with_feature_selected(gb_fast7, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.83      0.88      0.85       748
           0       0.62      0.69      0.66       183
           1       0.83      0.61      0.71       249

    accuracy                           0.79      1180
   macro avg       0.76      0.73      0.74      1180
weighted avg       0.80      0.79      0.79      1180

[[656  63  29]
 [ 54 127   2]
 [ 82  14 153]]
Probabilities distribution:
                  -1             0             1
count  1.180000e+03  1.180000e+03  1.180000e+03
mean   6.546903e-01  1.733943e-01  1.719155e-01
std    4.099661e-01  3.341139e-01  3.274779e-01
min    1.732949e-08  3.430894e-11  2.999192e-10
25%    1.747561e-01  5.721470e-07  1.076542e-04
50%    9.174468e-01  1.802939e-04  2.783262e-03
75%    9.985856e-01  1.030804e-01  1.197816e-01
max    9.999998e-01  1.000000e+00  1.000000e+00
Model: HistGradientBoos

In [18]:
result, m_trained = exp_clf_with_feature_selected(gb_fast8, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.83      0.86      0.85       748
           0       0.68      0.73      0.70       183
           1       0.73      0.60      0.66       249

    accuracy                           0.79      1180
   macro avg       0.75      0.73      0.73      1180
weighted avg       0.78      0.79      0.78      1180

[[646  54  48]
 [ 44 133   6]
 [ 90  10 149]]
Probabilities distribution:
                  -1             0             1
count  1.180000e+03  1.180000e+03  1.180000e+03
mean   6.501631e-01  1.712689e-01  1.785681e-01
std    4.097067e-01  3.326294e-01  3.326888e-01
min    1.023315e-07  1.665993e-20  4.053908e-10
25%    1.662976e-01  2.732443e-07  7.638769e-05
50%    9.078131e-01  1.424437e-04  3.316124e-03
75%    9.987155e-01  9.401175e-02  1.117963e-01
max    1.000000e+00  9.999999e-01  9.999997e-01
Model: HistGradientBoos

In [19]:
result, m_trained = exp_clf_with_feature_selected(gb_fast9, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.69      0.77       748
           0       0.47      0.86      0.61       183
           1       0.64      0.63      0.64       249

    accuracy                           0.71      1180
   macro avg       0.66      0.73      0.67      1180
weighted avg       0.76      0.71      0.72      1180

[[516 147  85]
 [ 22 158   3]
 [ 58  33 158]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.401789     0.293705     0.304506
std       0.137203     0.137391     0.137491
min       0.149523     0.131752     0.145936
25%       0.300502     0.177148     0.200522
50%       0.385872     0.241010     0.262544
75%       0.515662     0.393200     0.358893
max       0.682852     0.691512     0.692892
Model: HistGradientBoostingClassifier;Time taken: 

In [20]:
result, m_trained = exp_clf_with_feature_selected(gb_fast10, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.75      0.81       748
           0       0.53      0.89      0.67       183
           1       0.67      0.62      0.64       249

    accuracy                           0.74      1180
   macro avg       0.69      0.75      0.71      1180
weighted avg       0.78      0.74      0.75      1180

[[562 111  75]
 [ 18 162   3]
 [ 64  30 155]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.454681     0.262517     0.282802
std       0.209157     0.202911     0.203215
min       0.061644     0.058698     0.067425
25%       0.300180     0.097801     0.129649
50%       0.438927     0.179666     0.215094
75%       0.635228     0.405110     0.365523
max       0.853005     0.869118     0.861156
Model: HistGradientBoostingClassifier;Time taken: 

In [21]:
result, m_trained = exp_clf_with_feature_selected(gb_fast11, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.86      0.81      0.83       748
           0       0.59      0.84      0.69       183
           1       0.73      0.62      0.67       249

    accuracy                           0.77      1180
   macro avg       0.72      0.76      0.73      1180
weighted avg       0.79      0.77      0.78      1180

[[604  89  55]
 [ 26 154   3]
 [ 76  18 155]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.516317     0.227743     0.255940
std       0.274260     0.252787     0.253564
min       0.015182     0.011789     0.014589
25%       0.292404     0.033756     0.070328
50%       0.516283     0.093682     0.153329
75%       0.768941     0.374888     0.359004
max       0.956583     0.969941     0.962183
Model: HistGradientBoostingClassifier;Time taken: 

In [22]:
result, m_trained = exp_clf_with_feature_selected(gb_fast12, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.80      0.75      0.77       748
           0       0.50      0.63      0.56       183
           1       0.57      0.57      0.57       249

    accuracy                           0.69      1180
   macro avg       0.62      0.65      0.63      1180
weighted avg       0.70      0.69      0.70      1180

[[559  92  97]
 [ 56 115  12]
 [ 85  22 142]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.568662     0.199526     0.231812
std       0.404228     0.336991     0.349677
min       0.000000     0.000000     0.000000
25%       0.109054     0.000049     0.001109
50%       0.660784     0.003828     0.025755
75%       0.987256     0.265238     0.361017
max       1.000000     1.000000     1.000000
Model: HistGradientBoostingClassifier;Time taken: 

In [23]:
result, m_trained = exp_clf_with_feature_selected(gb_fast13, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.81      0.74      0.77       748
           0       0.48      0.65      0.55       183
           1       0.56      0.55      0.56       249

    accuracy                           0.69      1180
   macro avg       0.61      0.65      0.63      1180
weighted avg       0.70      0.69      0.69      1180

[[552  97  99]
 [ 53 119  11]
 [ 79  32 138]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.571648     0.207848     0.220504
std       0.408394     0.343565     0.348927
min       0.000000     0.000000     0.000000
25%       0.105476     0.000006     0.000726
50%       0.706170     0.004730     0.019901
75%       0.981708     0.275167     0.287997
max       1.000000     1.000000     1.000000
Model: HistGradientBoostingClassifier;Time taken: 

In [24]:
result, m_trained = exp_clf_with_feature_selected(gb_fast14, X_train, X_test, y_train_s, y_test_s)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.80      0.71      0.75       748
           0       0.50      0.68      0.58       183
           1       0.53      0.57      0.55       249

    accuracy                           0.67      1180
   macro avg       0.61      0.65      0.62      1180
weighted avg       0.69      0.67      0.68      1180

[[528 105 115]
 [ 48 125  10]
 [ 88  20 141]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.554819     0.209735     0.235446
std       0.405849     0.347260     0.349159
min       0.000000     0.000000     0.000000
25%       0.082819     0.000035     0.001196
50%       0.646573     0.004674     0.028738
75%       0.978910     0.268511     0.348199
max       1.000000     1.000000     1.000000
Model: HistGradientBoostingClassifier;Time taken: 

In [25]:
# change model as the first parameter in the function 
# result, m_trained = exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_s, y_test_s)

In [26]:
# result, m_trained = exp_clf_with_feature_selected(adaBoost, X_train, X_test, y_train_s, y_test_s)

In [27]:
# result, m_trained = exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_s, y_test_s)

In [28]:
# result, m_trained = exp_clf_with_feature_selected(knn, X_train, X_test, y_train_s, y_test_s)

In [29]:
# result, m_trained = exp_clf_with_feature_selected(svm, X_train, X_test, y_train_s, y_test_s)

In [30]:
# result, m_trained = exp_clf_with_feature_selected(rforest, X_train, X_test, y_train_s, y_test_s)

In [31]:
# result, m_trained = exp_clf_with_feature_selected(lightgbm, X_train, X_test, y_train_s, y_test_s)

In [32]:
# result, m_trained = exp_clf_with_feature_selected(dtree, X_train, X_test, y_train_s, y_test_s)

### How to save experiment metrics result

In [33]:
exp_results = []
for clf_model in [gb_fast,gb_fast1,gb_fast2,gb_fast3,gb_fast4,gb_fast5,gb_fast6,gb_fast7,gb_fast8,gb_fast9,gb_fast10,gb_fast11,gb_fast12,gb_fast13,gb_fast14]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

Model: HistGradientBoostingClassifier;Time taken: 15.97 seconds.

Model: HistGradientBoostingClassifier;Time taken: 30.712 seconds.

Model: HistGradientBoostingClassifier;Time taken: 55.205 seconds.

Model: HistGradientBoostingClassifier;Time taken: 12.449 seconds.

Model: HistGradientBoostingClassifier;Time taken: 23.806 seconds.

Model: HistGradientBoostingClassifier;Time taken: 45.913 seconds.

Model: HistGradientBoostingClassifier;Time taken: 10.135 seconds.

Model: HistGradientBoostingClassifier;Time taken: 16.773 seconds.

Model: HistGradientBoostingClassifier;Time taken: 36.551 seconds.

Model: HistGradientBoostingClassifier;Time taken: 12.774 seconds.

Model: HistGradientBoostingClassifier;Time taken: 24.327 seconds.

Model: HistGradientBoostingClassifier;Time taken: 47.076 seconds.

Model: HistGradientBoostingClassifier;Time taken: 3.639 seconds.

Model: HistGradientBoostingClassifier;Time taken: 3.45 seconds.

Model: HistGradientBoostingClassifier;Time taken: 3.664 seconds.



Unnamed: 0,accuracy,precision,recall,f1-score,-1_precision,-1_recall,-1_f1-score,0_precision,0_recall,0_f1-score,1_precision,1_recall,1_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.768644,0.721974,0.731999,0.721362,0.834239,"(0.820855614973262,)",0.827493,0.578059,"(0.7486338797814208,)",0.652381,0.753623,"(0.6265060240963856,)",0.684211,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.789831,0.751935,0.742319,0.742806,0.832468,"(0.856951871657754,)",0.844532,0.65566,"(0.7595628415300546,)",0.703797,0.767677,"(0.6104417670682731,)",0.680089,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.788983,0.756785,0.722756,0.733728,0.821026,"(0.8770053475935828,)",0.848093,0.651515,"(0.7049180327868853,)",0.677165,0.797814,"(0.5863453815261044,)",0.675926,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.786441,0.741769,0.764191,0.745392,0.855756,"(0.8248663101604278,)",0.840027,0.604,"(0.825136612021858,)",0.69746,0.76555,"(0.642570281124498,)",0.69869,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.780508,0.738416,0.746565,0.734738,0.841892,"(0.8328877005347594,)",0.837366,0.596708,"(0.7923497267759563,)",0.680751,0.77665,"(0.6144578313253012,)",0.686099,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.797458,0.765834,0.745992,0.748804,0.83376,"(0.8716577540106952,)",0.852288,0.660465,"(0.7759562841530054,)",0.713568,0.803279,"(0.5903614457831325,)",0.680556,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.767797,0.72282,0.704664,0.710706,0.814103,"(0.8489304812834224,)",0.831152,0.613065,"(0.6666666666666666,)",0.638743,0.741294,"(0.5983935742971888,)",0.662222,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.788136,0.757969,0.728032,0.735682,0.818523,"(0.8743315508021391,)",0.845507,0.681373,"(0.7595628415300546,)",0.718346,0.774011,"(0.5502008032128514,)",0.643192,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.784746,0.747767,0.728783,0.730972,0.829049,"(0.8622994652406417,)",0.845347,0.616438,"(0.7377049180327869,)",0.671642,0.797814,"(0.5863453815261044,)",0.675926,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.716949,0.668181,0.743677,0.682324,0.884941,"(0.6991978609625669,)",0.78118,0.480712,"(0.8852459016393442,)",0.623077,0.638889,"(0.6465863453815262,)",0.642715,3,"[-1, 1, 0]",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [34]:
# exp_results = []
# for clf_model in [rforest,adaBoost,gb_fast,gboost,knn,svm,lightgbm,dtree]:
#     result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
#     exp_results.append(result)
# pd.DataFrame(exp_results)

In [35]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-gb_fast.xlsx")

### Emotion 8-class

In [36]:
exp_results = []
for clf_model in [gb_fast,gb_fast1,gb_fast2,gb_fast3,gb_fast4,gb_fast5,gb_fast6,gb_fast7,gb_fast8,gb_fast9,gb_fast10,gb_fast11,gb_fast12,gb_fast13,gb_fast14]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_e, y_test_e,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

Model: HistGradientBoostingClassifier;Time taken: 30.821 seconds.

Model: HistGradientBoostingClassifier;Time taken: 60.698 seconds.

Model: HistGradientBoostingClassifier;Time taken: 118.948 seconds.

Model: HistGradientBoostingClassifier;Time taken: 30.162 seconds.

Model: HistGradientBoostingClassifier;Time taken: 59.169 seconds.

Model: HistGradientBoostingClassifier;Time taken: 117.237 seconds.

Model: HistGradientBoostingClassifier;Time taken: 7.11 seconds.

Model: HistGradientBoostingClassifier;Time taken: 6.891 seconds.

Model: HistGradientBoostingClassifier;Time taken: 6.895 seconds.

Model: HistGradientBoostingClassifier;Time taken: 30.262 seconds.

Model: HistGradientBoostingClassifier;Time taken: 59.674 seconds.

Model: HistGradientBoostingClassifier;Time taken: 116.962 seconds.

Model: HistGradientBoostingClassifier;Time taken: 6.835 seconds.

Model: HistGradientBoostingClassifier;Time taken: 6.765 seconds.

Model: HistGradientBoostingClassifier;Time taken: 6.741 seconds.


Unnamed: 0,accuracy,precision,recall,f1-score,Anger_precision,Anger_recall,Anger_f1-score,Calmness_precision,Calmness_recall,Calmness_f1-score,...,Sadness_precision,Sadness_recall,Sadness_f1-score,Surprise_precision,Surprise_recall,Surprise_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.651695,0.665643,0.687877,0.666853,0.80663,"(0.7807486631016043,)",0.793478,0.590909,"(0.8125,)",0.684211,...,0.635468,"(0.6898395721925134,)",0.661538,0.825397,"(0.8387096774193549,)",0.832,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.664407,0.69297,0.70338,0.690414,0.841176,"(0.7647058823529411,)",0.80112,0.7,"(0.875,)",0.777778,...,0.65,"(0.6951871657754011,)",0.671835,0.864407,"(0.8225806451612904,)",0.842975,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.655932,0.702068,0.682567,0.686181,0.807018,"(0.7379679144385026,)",0.77095,0.8,"(0.75,)",0.774194,...,0.595349,"(0.6844919786096256,)",0.636816,0.927273,"(0.8225806451612904,)",0.871795,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.622034,0.667129,0.673533,0.650677,0.808642,"(0.7005347593582888,)",0.750716,0.7,"(0.875,)",0.777778,...,0.661376,"(0.6684491978609626,)",0.664894,0.791045,"(0.8548387096774194,)",0.821705,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.649153,0.674323,0.680235,0.666509,0.806818,"(0.7593582887700535,)",0.782369,0.631579,"(0.75,)",0.685714,...,0.634518,"(0.6684491978609626,)",0.651042,0.854839,"(0.8548387096774194,)",0.854839,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.65678,0.69135,0.688792,0.681894,0.790055,"(0.7647058823529411,)",0.777174,0.722222,"(0.8125,)",0.764706,...,0.621495,"(0.7112299465240641,)",0.663342,0.877193,"(0.8064516129032258,)",0.840336,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.544915,0.534435,0.572151,0.536546,0.711111,"(0.6844919786096256,)",0.697548,0.333333,"(0.6875,)",0.44898,...,0.566667,"(0.5454545454545454,)",0.555858,0.634921,"(0.6451612903225806,)",0.64,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.517797,0.513617,0.569479,0.520771,0.688623,"(0.6149732620320856,)",0.649718,0.325,"(0.8125,)",0.464286,...,0.571429,"(0.4919786096256685,)",0.528736,0.611111,"(0.7096774193548387,)",0.656716,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.508475,0.488807,0.514597,0.489941,0.596939,"(0.6256684491978609,)",0.610966,0.258065,"(0.5,)",0.340426,...,0.541176,"(0.4919786096256685,)",0.515406,0.564516,"(0.5645161290322581,)",0.564516,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.514407,0.71449,0.560243,0.543471,0.888889,"(0.5133689839572193,)",0.650847,0.647059,"(0.6875,)",0.666667,...,0.772727,"(0.45454545454545453,)",0.572391,0.758065,"(0.7580645161290323,)",0.758065,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/HistGradientBoostingClassifier_model_...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [37]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-gb_fast-8.xlsx")

In [38]:
exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.79      0.73      0.76       187
    Calmness       0.63      0.75      0.69        16
     Disgust       0.67      0.43      0.52       187
        Fear       0.65      0.56      0.60       187
   Happiness       0.59      0.61      0.60       187
  Neutrality       0.51      0.81      0.63       167
     Sadness       0.66      0.66      0.66       187
    Surprise       0.86      0.81      0.83        62

    accuracy                           0.64      1180
   macro avg       0.67      0.67      0.66      1180
weighted avg       0.66      0.64      0.64      1180

[[136   0   8   9  31   2   0   1]
 [  0  12   0   0   0   2   2   0]
 [  9   1  80  12  20  52  12   1]
 [  5   0   8 105  20  19  28   2]
 [ 22   2  10  13 115  20   3   2]
 [  0   0   4   5   4 136  18   0]
 [  0   4   7  13   2  36 123   2]
 [  0   0   3  

({'accuracy': 0.6415254237288136,
  'precision': 0.6697683067525442,
  'recall': 0.6700159604028101,
  'f1-score': 0.6608080385414765,
  'Anger_precision': 0.7906976744186046,
  'Anger_recall': (0.7272727272727273,),
  'Anger_f1-score': 0.7576601671309192,
  'Calmness_precision': 0.631578947368421,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.6857142857142857,
  'Disgust_precision': 0.6666666666666666,
  'Disgust_recall': (0.42780748663101603,),
  'Disgust_f1-score': 0.521172638436482,
  'Fear_precision': 0.6521739130434783,
  'Fear_recall': (0.5614973262032086,),
  'Fear_f1-score': 0.603448275862069,
  'Happiness_precision': 0.5897435897435898,
  'Happiness_recall': (0.6149732620320856,),
  'Happiness_f1-score': 0.6020942408376964,
  'Neutrality_precision': 0.5074626865671642,
  'Neutrality_recall': (0.8143712574850299,),
  'Neutrality_f1-score': 0.6252873563218392,
  'Sadness_precision': 0.6577540106951871,
  'Sadness_recall': (0.6577540106951871,),
  'Sadness_f1-score': 0.

In [39]:
exp_clf_with_feature_selected(gb_fast1, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.80      0.75      0.77       187
    Calmness       0.82      0.88      0.85        16
     Disgust       0.63      0.42      0.50       187
        Fear       0.64      0.56      0.60       187
   Happiness       0.59      0.61      0.60       187
  Neutrality       0.54      0.77      0.64       167
     Sadness       0.58      0.65      0.61       187
    Surprise       0.93      0.84      0.88        62

    accuracy                           0.64      1180
   macro avg       0.69      0.68      0.68      1180
weighted avg       0.65      0.64      0.64      1180

[[140   0  12   5  27   1   2   0]
 [  0  14   0   0   0   1   1   0]
 [  9   0  78  12  21  44  23   0]
 [  6   0   4 105  21  16  35   0]
 [ 18   0  14  15 115  17   6   2]
 [  0   1   6   6   5 128  21   0]
 [  0   2   8  20   5  28 122   2]
 [  3   0   2  

({'accuracy': 0.6389830508474577,
  'precision': 0.6903207346616143,
  'recall': 0.6843536437130913,
  'f1-score': 0.6812240886653207,
  'Anger_precision': 0.7954545454545454,
  'Anger_recall': (0.7486631016042781,),
  'Anger_f1-score': 0.7713498622589533,
  'Calmness_precision': 0.8235294117647058,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.8484848484848485,
  'Disgust_precision': 0.6290322580645161,
  'Disgust_recall': (0.41711229946524064,),
  'Disgust_f1-score': 0.5016077170418006,
  'Fear_precision': 0.6363636363636364,
  'Fear_recall': (0.5614973262032086,),
  'Fear_f1-score': 0.5965909090909092,
  'Happiness_precision': 0.5867346938775511,
  'Happiness_recall': (0.6149732620320856,),
  'Happiness_f1-score': 0.6005221932114883,
  'Neutrality_precision': 0.5446808510638298,
  'Neutrality_recall': (0.7664670658682635,),
  'Neutrality_f1-score': 0.6368159203980099,
  'Sadness_precision': 0.5781990521327014,
  'Sadness_recall': (0.6524064171122995,),
  'Sadness_f1-score'

In [40]:
exp_clf_with_feature_selected(gb_fast2, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.80      0.76      0.78       187
    Calmness       0.71      0.94      0.81        16
     Disgust       0.67      0.45      0.54       187
        Fear       0.58      0.55      0.56       187
   Happiness       0.61      0.63      0.62       187
  Neutrality       0.57      0.79      0.66       167
     Sadness       0.63      0.66      0.64       187
    Surprise       0.89      0.82      0.86        62

    accuracy                           0.65      1180
   macro avg       0.68      0.70      0.68      1180
weighted avg       0.66      0.65      0.65      1180

[[142   0  10   9  24   1   1   0]
 [  0  15   0   0   0   0   1   0]
 [  8   0  85  17  23  40  13   1]
 [  6   0   5 102  20  17  36   1]
 [ 20   1   9  17 117  17   4   2]
 [  0   2   6   9   3 132  15   0]
 [  0   3  11  20   3  25 123   2]
 [  2   0   1  

({'accuracy': 0.65,
  'precision': 0.6822932890922159,
  'recall': 0.6991600694376299,
  'f1-score': 0.6836452329809156,
  'Anger_precision': 0.797752808988764,
  'Anger_recall': (0.7593582887700535,),
  'Anger_f1-score': 0.7780821917808219,
  'Calmness_precision': 0.7142857142857143,
  'Calmness_recall': (0.9375,),
  'Calmness_f1-score': 0.8108108108108109,
  'Disgust_precision': 0.6692913385826772,
  'Disgust_recall': (0.45454545454545453,),
  'Disgust_f1-score': 0.5414012738853503,
  'Fear_precision': 0.5795454545454546,
  'Fear_recall': (0.5454545454545454,),
  'Fear_f1-score': 0.5619834710743802,
  'Happiness_precision': 0.6062176165803109,
  'Happiness_recall': (0.6256684491978609,),
  'Happiness_f1-score': 0.6157894736842106,
  'Neutrality_precision': 0.5689655172413793,
  'Neutrality_recall': (0.7904191616766467,),
  'Neutrality_f1-score': 0.6616541353383458,
  'Sadness_precision': 0.6275510204081632,
  'Sadness_recall': (0.6577540106951871,),
  'Sadness_f1-score': 0.6422976501

In [41]:
exp_clf_with_feature_selected(gb_fast3, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.71      0.75       187
    Calmness       0.58      0.69      0.63        16
     Disgust       0.68      0.35      0.46       187
        Fear       0.70      0.47      0.56       187
   Happiness       0.55      0.65      0.60       187
  Neutrality       0.46      0.86      0.60       167
     Sadness       0.66      0.65      0.65       187
    Surprise       0.80      0.82      0.81        62

    accuracy                           0.62      1180
   macro avg       0.66      0.65      0.63      1180
weighted avg       0.65      0.62      0.62      1180

[[132   0   9   4  39   3   0   0]
 [  0  11   0   0   0   1   4   0]
 [  3   1  65  11  29  64  12   2]
 [  7   0   6  88  17  32  30   7]
 [ 20   1   6   8 121  26   3   2]
 [  0   3   2   4   3 143  12   0]
 [  0   3   6   7   6  42 121   2]
 [  1   0   2  

({'accuracy': 0.6203389830508474,
  'precision': 0.6550302909933848,
  'recall': 0.6480687360616012,
  'f1-score': 0.633035875158484,
  'Anger_precision': 0.8098159509202454,
  'Anger_recall': (0.7058823529411765,),
  'Anger_f1-score': 0.7542857142857144,
  'Calmness_precision': 0.5789473684210527,
  'Calmness_recall': (0.6875,),
  'Calmness_f1-score': 0.6285714285714286,
  'Disgust_precision': 0.6770833333333334,
  'Disgust_recall': (0.34759358288770054,),
  'Disgust_f1-score': 0.45936395759717313,
  'Fear_precision': 0.704,
  'Fear_recall': (0.47058823529411764,),
  'Fear_f1-score': 0.5641025641025641,
  'Happiness_precision': 0.5525114155251142,
  'Happiness_recall': (0.6470588235294118,),
  'Happiness_f1-score': 0.5960591133004927,
  'Neutrality_precision': 0.45980707395498394,
  'Neutrality_recall': (0.8562874251497006,),
  'Neutrality_f1-score': 0.5983263598326359,
  'Sadness_precision': 0.6612021857923497,
  'Sadness_recall': (0.6470588235294118,),
  'Sadness_f1-score': 0.654054

In [42]:
exp_clf_with_feature_selected(gb_fast4, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.83      0.75      0.79       187
    Calmness       0.52      0.69      0.59        16
     Disgust       0.63      0.34      0.44       187
        Fear       0.67      0.55      0.60       187
   Happiness       0.62      0.68      0.65       187
  Neutrality       0.52      0.81      0.63       167
     Sadness       0.62      0.67      0.64       187
    Surprise       0.85      0.84      0.85        62

    accuracy                           0.64      1180
   macro avg       0.66      0.67      0.65      1180
weighted avg       0.66      0.64      0.64      1180

[[141   0  12   4  27   2   1   0]
 [  0  11   0   0   0   1   4   0]
 [  8   2  64  14  24  56  18   1]
 [  7   0   4 102  17  19  34   4]
 [ 13   2   7  12 128  17   6   2]
 [  0   2   8   6   4 135  12   0]
 [  0   4   6  13   5  31 126   2]
 [  1   0   1  

({'accuracy': 0.6432203389830509,
  'precision': 0.6562588756857702,
  'recall': 0.6668241138690361,
  'f1-score': 0.649548657093832,
  'Anger_precision': 0.8294117647058824,
  'Anger_recall': (0.7540106951871658,),
  'Anger_f1-score': 0.7899159663865547,
  'Calmness_precision': 0.5238095238095238,
  'Calmness_recall': (0.6875,),
  'Calmness_f1-score': 0.5945945945945946,
  'Disgust_precision': 0.6274509803921569,
  'Disgust_recall': (0.3422459893048128,),
  'Disgust_f1-score': 0.44290657439446357,
  'Fear_precision': 0.6666666666666666,
  'Fear_recall': (0.5454545454545454,),
  'Fear_f1-score': 0.6,
  'Happiness_precision': 0.6153846153846154,
  'Happiness_recall': (0.6844919786096256,),
  'Happiness_f1-score': 0.6481012658227848,
  'Neutrality_precision': 0.5172413793103449,
  'Neutrality_recall': (0.8083832335329342,),
  'Neutrality_f1-score': 0.630841121495327,
  'Sadness_precision': 0.6176470588235294,
  'Sadness_recall': (0.6737967914438503,),
  'Sadness_f1-score': 0.644501278772

In [43]:
exp_clf_with_feature_selected(gb_fast5, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.80      0.81       187
    Calmness       0.60      0.75      0.67        16
     Disgust       0.64      0.43      0.51       187
        Fear       0.65      0.56      0.60       187
   Happiness       0.60      0.63      0.61       187
  Neutrality       0.55      0.78      0.65       167
     Sadness       0.61      0.67      0.64       187
    Surprise       0.91      0.82      0.86        62

    accuracy                           0.65      1180
   macro avg       0.67      0.68      0.67      1180
weighted avg       0.66      0.65      0.65      1180

[[149   0  10   4  22   2   0   0]
 [  0  12   0   0   0   0   4   0]
 [ 10   0  80  14  24  42  16   1]
 [  4   0   5 104  22  16  35   1]
 [ 19   3   9  15 117  18   4   2]
 [  0   2   7   5   4 131  18   0]
 [  1   3  11  15   2  29 125   1]
 [  0   0   3  

({'accuracy': 0.6516949152542373,
  'precision': 0.6726474981998883,
  'recall': 0.6789847616307836,
  'f1-score': 0.6686494991911505,
  'Anger_precision': 0.8142076502732241,
  'Anger_recall': (0.7967914438502673,),
  'Anger_f1-score': 0.8054054054054055,
  'Calmness_precision': 0.6,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.6666666666666665,
  'Disgust_precision': 0.64,
  'Disgust_recall': (0.42780748663101603,),
  'Disgust_f1-score': 0.5128205128205128,
  'Fear_precision': 0.65,
  'Fear_recall': (0.5561497326203209,),
  'Fear_f1-score': 0.5994236311239194,
  'Happiness_precision': 0.6030927835051546,
  'Happiness_recall': (0.6256684491978609,),
  'Happiness_f1-score': 0.6141732283464566,
  'Neutrality_precision': 0.5504201680672269,
  'Neutrality_recall': (0.7844311377245509,),
  'Neutrality_f1-score': 0.6469135802469137,
  'Sadness_precision': 0.6127450980392157,
  'Sadness_recall': (0.6684491978609626,),
  'Sadness_f1-score': 0.639386189258312,
  'Surprise_precision':

In [44]:
exp_clf_with_feature_selected(gb_fast6, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.69      0.62      0.65       187
    Calmness       0.12      0.19      0.15        16
     Disgust       0.49      0.32      0.39       187
        Fear       0.51      0.42      0.46       187
   Happiness       0.49      0.55      0.51       187
  Neutrality       0.48      0.70      0.57       167
     Sadness       0.57      0.56      0.56       187
    Surprise       0.62      0.69      0.66        62

    accuracy                           0.53      1180
   macro avg       0.50      0.51      0.49      1180
weighted avg       0.54      0.53      0.53      1180

[[116   2  12  16  36   2   2   1]
 [  0   3   0   3   0  10   0   0]
 [  9   1  60  20  24  50  20   3]
 [ 16   2   7  79  24  20  33   6]
 [ 22   5  16  14 102  15   5   8]
 [  2   0  11   6   9 117  18   4]
 [  3  10  12  12  10  31 105   4]
 [  1   2   4  

({'accuracy': 0.5296610169491526,
  'precision': 0.495407604924554,
  'recall': 0.5065294280982626,
  'f1-score': 0.4938955023862157,
  'Anger_precision': 0.6863905325443787,
  'Anger_recall': (0.6203208556149733,),
  'Anger_f1-score': 0.6516853932584269,
  'Calmness_precision': 0.12,
  'Calmness_recall': (0.1875,),
  'Calmness_f1-score': 0.14634146341463414,
  'Disgust_precision': 0.4918032786885246,
  'Disgust_recall': (0.32085561497326204,),
  'Disgust_f1-score': 0.38834951456310673,
  'Fear_precision': 0.512987012987013,
  'Fear_recall': (0.42245989304812837,),
  'Fear_f1-score': 0.4633431085043988,
  'Happiness_precision': 0.4857142857142857,
  'Happiness_recall': (0.5454545454545454,),
  'Happiness_f1-score': 0.5138539042821159,
  'Neutrality_precision': 0.47560975609756095,
  'Neutrality_recall': (0.7005988023952096,),
  'Neutrality_f1-score': 0.5665859564164648,
  'Sadness_precision': 0.5675675675675675,
  'Sadness_recall': (0.5614973262032086,),
  'Sadness_f1-score': 0.5645161

In [45]:
exp_clf_with_feature_selected(gb_fast7, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.71      0.58      0.64       187
    Calmness       0.21      0.38      0.27        16
     Disgust       0.45      0.32      0.38       187
        Fear       0.54      0.41      0.46       187
   Happiness       0.45      0.53      0.49       187
  Neutrality       0.42      0.69      0.52       167
     Sadness       0.58      0.50      0.54       187
    Surprise       0.68      0.74      0.71        62

    accuracy                           0.51      1180
   macro avg       0.50      0.52      0.50      1180
weighted avg       0.53      0.51      0.51      1180

[[109   0  21  10  38   2   3   4]
 [  0   6   0   2   0   5   3   0]
 [ 11   2  60  12  27  57  17   1]
 [  7   3  12  76  29  29  26   5]
 [ 19   3   9  18 100  24   7   7]
 [  3   4  13   8  11 115  12   1]
 [  1  11  14  12  12  39  94   4]
 [  3   0   4  

({'accuracy': 0.5135593220338983,
  'precision': 0.504962760960121,
  'recall': 0.5191439776562108,
  'f1-score': 0.5006782071034785,
  'Anger_precision': 0.7124183006535948,
  'Anger_recall': (0.5828877005347594,),
  'Anger_f1-score': 0.6411764705882352,
  'Calmness_precision': 0.20689655172413793,
  'Calmness_recall': (0.375,),
  'Calmness_f1-score': 0.26666666666666666,
  'Disgust_precision': 0.45112781954887216,
  'Disgust_recall': (0.32085561497326204,),
  'Disgust_f1-score': 0.37500000000000006,
  'Fear_precision': 0.5428571428571428,
  'Fear_recall': (0.40641711229946526,),
  'Fear_f1-score': 0.46483180428134563,
  'Happiness_precision': 0.45045045045045046,
  'Happiness_recall': (0.5347593582887701,),
  'Happiness_f1-score': 0.48899755501222497,
  'Neutrality_precision': 0.4227941176470588,
  'Neutrality_recall': (0.688622754491018,),
  'Neutrality_f1-score': 0.5239179954441913,
  'Sadness_precision': 0.5766871165644172,
  'Sadness_recall': (0.5026737967914439,),
  'Sadness_f1-

In [46]:
exp_clf_with_feature_selected(gb_fast8, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.64      0.59      0.61       187
    Calmness       0.29      0.38      0.32        16
     Disgust       0.48      0.32      0.38       187
        Fear       0.52      0.40      0.45       187
   Happiness       0.50      0.53      0.51       187
  Neutrality       0.43      0.70      0.53       167
     Sadness       0.59      0.55      0.57       187
    Surprise       0.51      0.61      0.55        62

    accuracy                           0.51      1180
   macro avg       0.49      0.51      0.49      1180
weighted avg       0.52      0.51      0.51      1180

[[110   0  13  15  30   5   3  11]
 [  0   6   2   5   0   2   1   0]
 [ 14   3  59  13  21  61  13   3]
 [ 10   2   7  75  28  29  28   8]
 [ 24   2  14  13  99  19   7   9]
 [  4   2  10   5  10 117  17   2]
 [  5   5  13  14   5  38 103   4]
 [  6   1   4  

({'accuracy': 0.514406779661017,
  'precision': 0.4937803521093716,
  'recall': 0.5091910957711969,
  'f1-score': 0.49271265936049397,
  'Anger_precision': 0.6358381502890174,
  'Anger_recall': (0.5882352941176471,),
  'Anger_f1-score': 0.611111111111111,
  'Calmness_precision': 0.2857142857142857,
  'Calmness_recall': (0.375,),
  'Calmness_f1-score': 0.3243243243243243,
  'Disgust_precision': 0.48360655737704916,
  'Disgust_recall': (0.3155080213903743,),
  'Disgust_f1-score': 0.38187702265372164,
  'Fear_precision': 0.5172413793103449,
  'Fear_recall': (0.40106951871657753,),
  'Fear_f1-score': 0.45180722891566266,
  'Happiness_precision': 0.49748743718592964,
  'Happiness_recall': (0.5294117647058824,),
  'Happiness_f1-score': 0.5129533678756477,
  'Neutrality_precision': 0.4317343173431734,
  'Neutrality_recall': (0.7005988023952096,),
  'Neutrality_f1-score': 0.5342465753424657,
  'Sadness_precision': 0.5919540229885057,
  'Sadness_recall': (0.5508021390374331,),
  'Sadness_f1-sco

In [47]:
exp_clf_with_feature_selected(gb_fast9, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.93      0.52      0.67       187
    Calmness       0.67      0.62      0.65        16
     Disgust       0.92      0.18      0.30       187
        Fear       0.94      0.26      0.40       187
   Happiness       0.42      0.71      0.52       187
  Neutrality       0.32      0.95      0.48       167
     Sadness       0.80      0.45      0.58       187
    Surprise       0.78      0.73      0.75        62

    accuracy                           0.52      1180
   macro avg       0.72      0.55      0.54      1180
weighted avg       0.73      0.52      0.51      1180

[[ 97   0   2   0  79   9   0   0]
 [  0  10   0   0   0   6   0   0]
 [  0   1  34   1  38 107   3   3]
 [  3   0   0  48  49  68  14   5]
 [  4   0   1   1 132  46   0   3]
 [  0   2   0   0   3 158   4   0]
 [  0   2   0   1   8  90  84   2]
 [  0   0   0  

({'accuracy': 0.5152542372881356,
  'precision': 0.7214436229795502,
  'recall': 0.5511517126605854,
  'f1-score': 0.5432987361076407,
  'Anger_precision': 0.9326923076923077,
  'Anger_recall': (0.5187165775401069,),
  'Anger_f1-score': 0.6666666666666665,
  'Calmness_precision': 0.6666666666666666,
  'Calmness_recall': (0.625,),
  'Calmness_f1-score': 0.6451612903225806,
  'Disgust_precision': 0.918918918918919,
  'Disgust_recall': (0.18181818181818182,),
  'Disgust_f1-score': 0.3035714285714286,
  'Fear_precision': 0.9411764705882353,
  'Fear_recall': (0.25668449197860965,),
  'Fear_f1-score': 0.4033613445378152,
  'Happiness_precision': 0.41509433962264153,
  'Happiness_recall': (0.7058823529411765,),
  'Happiness_f1-score': 0.5227722772277228,
  'Neutrality_precision': 0.32113821138211385,
  'Neutrality_recall': (0.9461077844311377,),
  'Neutrality_f1-score': 0.4795144157814871,
  'Sadness_precision': 0.8,
  'Sadness_recall': (0.44919786096256686,),
  'Sadness_f1-score': 0.57534246

In [48]:
exp_clf_with_feature_selected(gb_fast10, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.65      0.72       187
    Calmness       0.50      0.62      0.56        16
     Disgust       0.88      0.22      0.36       187
        Fear       0.87      0.33      0.47       187
   Happiness       0.47      0.68      0.56       187
  Neutrality       0.39      0.90      0.55       167
     Sadness       0.69      0.59      0.64       187
    Surprise       0.70      0.87      0.78        62

    accuracy                           0.57      1180
   macro avg       0.66      0.61      0.58      1180
weighted avg       0.69      0.57      0.56      1180

[[121   0   3   1  53   5   1   3]
 [  0  10   0   0   0   6   0   0]
 [  5   1  42   4  35  83  11   6]
 [  8   0   0  61  38  45  28   7]
 [ 15   1   2   2 128  32   3   4]
 [  0   4   1   1   4 151   6   0]
 [  0   4   0   1   8  60 111   3]
 [  0   0   0  

({'accuracy': 0.5745762711864407,
  'precision': 0.6648529491948323,
  'recall': 0.6095118984473695,
  'f1-score': 0.5788729588329042,
  'Anger_precision': 0.8120805369127517,
  'Anger_recall': (0.6470588235294118,),
  'Anger_f1-score': 0.7202380952380952,
  'Calmness_precision': 0.5,
  'Calmness_recall': (0.625,),
  'Calmness_f1-score': 0.5555555555555556,
  'Disgust_precision': 0.875,
  'Disgust_recall': (0.22459893048128343,),
  'Disgust_f1-score': 0.3574468085106383,
  'Fear_precision': 0.8714285714285714,
  'Fear_recall': (0.32620320855614976,),
  'Fear_f1-score': 0.4747081712062257,
  'Happiness_precision': 0.4740740740740741,
  'Happiness_recall': (0.6844919786096256,),
  'Happiness_f1-score': 0.5601750547045953,
  'Neutrality_precision': 0.3911917098445596,
  'Neutrality_recall': (0.9041916167664671,),
  'Neutrality_f1-score': 0.546112115732369,
  'Sadness_precision': 0.69375,
  'Sadness_recall': (0.5935828877005348,),
  'Sadness_f1-score': 0.6397694524495677,
  'Surprise_preci

In [49]:
exp_clf_with_feature_selected(gb_fast11, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.68      0.74       187
    Calmness       0.52      0.69      0.59        16
     Disgust       0.70      0.28      0.40       187
        Fear       0.76      0.42      0.54       187
   Happiness       0.53      0.68      0.60       187
  Neutrality       0.45      0.87      0.59       167
     Sadness       0.64      0.65      0.65       187
    Surprise       0.76      0.85      0.80        62

    accuracy                           0.61      1180
   macro avg       0.65      0.64      0.61      1180
weighted avg       0.66      0.61      0.60      1180

[[127   0   8   3  44   3   1   1]
 [  0  11   0   0   0   3   2   0]
 [  5   2  52   5  33  68  18   4]
 [  8   0   3  78  25  31  34   8]
 [ 15   1   3   6 128  28   4   2]
 [  0   3   3   4   3 145   9   0]
 [  0   4   2   5   7  45 122   2]
 [  1   0   3  

({'accuracy': 0.6067796610169491,
  'precision': 0.6473560785824288,
  'recall': 0.640229016156922,
  'f1-score': 0.6137005694177844,
  'Anger_precision': 0.8141025641025641,
  'Anger_recall': (0.679144385026738,),
  'Anger_f1-score': 0.7405247813411078,
  'Calmness_precision': 0.5238095238095238,
  'Calmness_recall': (0.6875,),
  'Calmness_f1-score': 0.5945945945945946,
  'Disgust_precision': 0.7027027027027027,
  'Disgust_recall': (0.27807486631016043,),
  'Disgust_f1-score': 0.3984674329501916,
  'Fear_precision': 0.7647058823529411,
  'Fear_recall': (0.41711229946524064,),
  'Fear_f1-score': 0.5397923875432526,
  'Happiness_precision': 0.5267489711934157,
  'Happiness_recall': (0.6844919786096256,),
  'Happiness_f1-score': 0.5953488372093024,
  'Neutrality_precision': 0.44753086419753085,
  'Neutrality_recall': (0.8682634730538922,),
  'Neutrality_f1-score': 0.5906313645621181,
  'Sadness_precision': 0.6421052631578947,
  'Sadness_recall': (0.6524064171122995,),
  'Sadness_f1-score

In [50]:
exp_clf_with_feature_selected(gb_fast12, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.68      0.50      0.58       187
    Calmness       0.23      0.56      0.32        16
     Disgust       0.39      0.27      0.32       187
        Fear       0.47      0.34      0.39       187
   Happiness       0.43      0.53      0.48       187
  Neutrality       0.39      0.63      0.48       167
     Sadness       0.55      0.46      0.50       187
    Surprise       0.48      0.66      0.56        62

    accuracy                           0.46      1180
   macro avg       0.45      0.49      0.45      1180
weighted avg       0.48      0.46      0.46      1180

[[ 94   2  13  10  46  12   5   5]
 [  0   9   1   0   0   2   4   0]
 [ 14   7  50  10  31  51  19   5]
 [ 12   4  10  63  24  33  23  18]
 [ 13   5  12  22 100  25   4   6]
 [  2   4  21   6  12 105  14   3]
 [  0   8  17  17  10  42  86   7]
 [  4   1   3  

({'accuracy': 0.46440677966101696,
  'precision': 0.4532438100970886,
  'recall': 0.4942671394531964,
  'f1-score': 0.4537229233513667,
  'Anger_precision': 0.6762589928057554,
  'Anger_recall': (0.5026737967914439,),
  'Anger_f1-score': 0.5766871165644172,
  'Calmness_precision': 0.225,
  'Calmness_recall': (0.5625,),
  'Calmness_f1-score': 0.3214285714285714,
  'Disgust_precision': 0.3937007874015748,
  'Disgust_recall': (0.26737967914438504,),
  'Disgust_f1-score': 0.3184713375796179,
  'Fear_precision': 0.47368421052631576,
  'Fear_recall': (0.33689839572192515,),
  'Fear_f1-score': 0.39375000000000004,
  'Happiness_precision': 0.43478260869565216,
  'Happiness_recall': (0.5347593582887701,),
  'Happiness_f1-score': 0.47961630695443647,
  'Neutrality_precision': 0.3888888888888889,
  'Neutrality_recall': (0.6287425149700598,),
  'Neutrality_f1-score': 0.4805491990846682,
  'Sadness_precision': 0.5512820512820513,
  'Sadness_recall': (0.45989304812834225,),
  'Sadness_f1-score': 0.5

In [51]:
exp_clf_with_feature_selected(gb_fast13, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.65      0.45      0.54       187
    Calmness       0.10      0.88      0.18        16
     Disgust       0.50      0.30      0.38       187
        Fear       0.46      0.36      0.41       187
   Happiness       0.44      0.51      0.47       187
  Neutrality       0.45      0.63      0.52       167
     Sadness       0.62      0.45      0.52       187
    Surprise       0.65      0.65      0.65        62

    accuracy                           0.46      1180
   macro avg       0.48      0.53      0.46      1180
weighted avg       0.52      0.46      0.48      1180

[[ 85  11   7  30  45   3   0   6]
 [  0  14   0   0   0   1   0   1]
 [ 12  20  56  13  20  50  13   3]
 [ 10  24  14  68  32  14  21   4]
 [ 22  17   9  16  95  17   7   4]
 [  0  26  10   2  11 106   9   3]
 [  1  19  15  17   7  43  84   1]
 [  0  10   0  

({'accuracy': 0.46440677966101696,
  'precision': 0.48408192343129186,
  'recall': 0.5287197674256455,
  'f1-score': 0.4574495040241394,
  'Anger_precision': 0.6538461538461539,
  'Anger_recall': (0.45454545454545453,),
  'Anger_f1-score': 0.5362776025236593,
  'Calmness_precision': 0.09929078014184398,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.17834394904458598,
  'Disgust_precision': 0.5045045045045045,
  'Disgust_recall': (0.2994652406417112,),
  'Disgust_f1-score': 0.3758389261744966,
  'Fear_precision': 0.46258503401360546,
  'Fear_recall': (0.36363636363636365,),
  'Fear_f1-score': 0.407185628742515,
  'Happiness_precision': 0.4377880184331797,
  'Happiness_recall': (0.5080213903743316,),
  'Happiness_f1-score': 0.47029702970297027,
  'Neutrality_precision': 0.4472573839662447,
  'Neutrality_recall': (0.6347305389221557,),
  'Neutrality_f1-score': 0.5247524752475248,
  'Sadness_precision': 0.6222222222222222,
  'Sadness_recall': (0.44919786096256686,),
  'Sadness_f1

In [52]:
exp_clf_with_feature_selected(gb_fast14, X_train, X_test, y_train_e, y_test_e)

Model Name: HistGradientBoostingClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.66      0.41      0.51       187
    Calmness       0.18      1.00      0.30        16
     Disgust       0.51      0.29      0.37       187
        Fear       0.39      0.33      0.36       187
   Happiness       0.38      0.42      0.40       187
  Neutrality       0.37      0.64      0.47       167
     Sadness       0.53      0.43      0.47       187
    Surprise       0.65      0.63      0.64        62

    accuracy                           0.44      1180
   macro avg       0.46      0.52      0.44      1180
weighted avg       0.48      0.44      0.44      1180

[[ 77   8   8  17  58  13   4   2]
 [  0  16   0   0   0   0   0   0]
 [  6   7  55  15  27  58  17   2]
 [ 13  13   8  62  20  37  30   4]
 [ 20  11  15  21  79  23   9   9]
 [  0   9  10  18  13 107   9   1]
 [  1  13  10  24   7  49  80   3]
 [  0  13   2  

({'accuracy': 0.4364406779661017,
  'precision': 0.45888909263212607,
  'recall': 0.5196814194622658,
  'f1-score': 0.4405310599222959,
  'Anger_precision': 0.6581196581196581,
  'Anger_recall': (0.4117647058823529,),
  'Anger_f1-score': 0.506578947368421,
  'Calmness_precision': 0.17777777777777778,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.3018867924528302,
  'Disgust_precision': 0.5092592592592593,
  'Disgust_recall': (0.29411764705882354,),
  'Disgust_f1-score': 0.3728813559322034,
  'Fear_precision': 0.39490445859872614,
  'Fear_recall': (0.3315508021390374,),
  'Fear_f1-score': 0.3604651162790698,
  'Happiness_precision': 0.3761904761904762,
  'Happiness_recall': (0.42245989304812837,),
  'Happiness_f1-score': 0.3979848866498741,
  'Neutrality_precision': 0.3715277777777778,
  'Neutrality_recall': (0.6407185628742516,),
  'Neutrality_f1-score': 0.4703296703296703,
  'Sadness_precision': 0.5333333333333333,
  'Sadness_recall': (0.42780748663101603,),
  'Sadness_f1-scor

In [53]:
# change y_lable into emo
# exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_e, y_test_e)

In [54]:
# check how long on normal gradient boosting
# exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_e, y_test_e)

### Threshold tuning

In [55]:
# Calculate accuracy for the given threshold

probabilities = gb_fast2.predict_proba(X_test)

def calc_acc_by_thres(probabilities, threshold, y_test):
    predictions_adj = []
    # Loop through each sample's probabilities
    for probs in probabilities:
        if probs[0] > threshold:
            pred_class = -1
        elif probs[1] > probs[2]:
            pred_class = 0
        else:
            pred_class = 1
        predictions_adj.append(pred_class)
    accuracy = np.mean(predictions_adj == y_test)
    precision, recall, f1score, _ = precision_recall_fscore_support(y_test_s, predictions_adj, average=None)
    return accuracy, min(f1score), np.var(f1score)

best_threshold = None
best_accuracy = 0.0
best_f1score = 0.0
# best_f1s_var = 10

# Define a range of threshold values to try
threshold_range = np.linspace(0.15, 0.6, 100)
for threshold in threshold_range:
    accuracy, min_f1_score, var_f1_score = calc_acc_by_thres(probabilities, threshold, y_test_s)
    # if accuracy > best_accuracy:
    #     best_accuracy = accuracy
    #     best_threshold = threshold
    if min_f1_score > best_f1score:
        best_f1score = min_f1_score
        best_threshold = threshold
        best_accuracy = accuracy
    # if var_f1_score < best_f1s_var:
    #     best_f1score = min_f1_score
    #     best_threshold = threshold
    #     best_accuracy = accuracy
    #     best_f1s_var = var_f1_score
        

print("Best Threshold:", best_threshold)
print("Best Accuracy:", best_accuracy)
print("Best min f1 score:", best_f1score)
# print("Best var f1 score:", best_f1s_var)

Best Threshold: 0.15
Best Accuracy: 0.34491525423728814
Best min f1 score: 0.18357487922705315


In [56]:
# if you want to adjust the threshold; Predict probabilities for each class label;
# best threshold and best_threshold x 110% 120% 130% to see trend
probabilities = gb_fast2.predict_proba(X_test)

threshold = best_threshold
print('BEST Threshold:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.1
print('Threshold-2:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.2
print('Threshold-3:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.3
print('Threshold-4:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

BEST Threshold: 0.15
              precision    recall  f1-score   support

          -1       0.80      0.25      0.38       748
           0       0.79      0.10      0.18       183
           1       0.22      0.80      0.34       249

    accuracy                           0.34      1180
   macro avg       0.60      0.39      0.30      1180
weighted avg       0.67      0.34      0.34      1180

[[188   4 556]
 [  0  19 164]
 [ 48   1 200]]
Accuracy: 0.34491525423728814
Threshold-2: 0.165
              precision    recall  f1-score   support

          -1       0.80      0.25      0.38       748
           0       0.79      0.10      0.18       183
           1       0.22      0.81      0.34       249

    accuracy                           0.34      1180
   macro avg       0.60      0.39      0.30      1180
weighted avg       0.68      0.34      0.34      1180

[[185   4 559]
 [  0  19 164]
 [ 46   1 202]]
Accuracy: 0.3440677966101695
Threshold-3: 0.18
              precision    re