In [1]:
import numpy as np
import pandas as pd
import glob
import os, sys
import matplotlib.pyplot as plt
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
VERSION = 4
RANDOM_SEED = 26

## Load data

### Load Data - downsampled

In [2]:
# df_joint_train_org   = pd.read_csv(f'./features/cache_all_features_train_V{VERSION}.csv')
# df_joint_train_org   = df_joint_train_org.drop(columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])
                       
# df_joint_test_org = pd.read_csv(f'./features/cache_all_features_test_V{VERSION}.csv').drop(
#     columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])

# print("shape of train set: ", df_joint_train_org.shape)
# print("shape of test  set: ", df_joint_test_org.shape)


# df_joint_train  = pd.read_csv(f'./features/cache_train_V4_resampled_2500.csv')
df_joint_test = pd.read_csv(f'./cache_all_features_test_V4.csv')

# print("shape of train set: ", df_joint_train.shape)
print("shape of test  set: ", df_joint_test.shape)

df_joint_train_aug  = pd.read_csv(f'./cache_train_V4_augmented.csv')
feature_column_names = [i for i in df_joint_train_aug.columns \
                        if i not in ['file_path','renamed_file_path','split','sentiment_value','emotional_category']]
             
print("shape of train set: ", df_joint_train_aug.shape)
df_joint_train_aug.groupby('sentiment_value')['file_path'].count()

shape of test  set:  (1180, 1550)
shape of train set:  (24885, 1546)


sentiment_value
-1    7999
 0    8560
 1    8326
Name: file_path, dtype: int64

### best guess feature combinations

In [3]:
# generate selected features 
def generate_selected_features_by_type(feature_column_names,input,stats,number=1):
    selected_result = []
    for name in feature_column_names:
        if input+"_"+stats in name:
            selected_result.append(name)
    if number < len(selected_result):
        selected_result = selected_result[:number]
    return selected_result

# example to take mfcc 20 mean & std; mel32; zcr all 5 stats features
feature_MFCC20_mean  = generate_selected_features_by_type(feature_column_names,"mfcc","mean",20)
feature_MFCC20_std   = generate_selected_features_by_type(feature_column_names,"mfcc","std",20)
feature_mel32_median = generate_selected_features_by_type(feature_column_names,"mel32","median",32)
feature_mel32_std    = generate_selected_features_by_type(feature_column_names,"mel32","std",32)
feature_zcr_stats    = generate_selected_features_by_type(feature_column_names,"zcr","",5)
feature_rms_stats    = generate_selected_features_by_type(feature_column_names,"rms","",5)
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum','Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
selected_HNR = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
selected_prosody = selected_intensity + selected_pitch # + ['Local Jitter','Local Shimmer']
selected_feature_names = feature_MFCC20_mean + feature_MFCC20_std + feature_mel32_median + feature_mel32_std + \
                        feature_zcr_stats + feature_rms_stats + selected_intensity + selected_pitch 

In [4]:
# default use augmented training set and balanced test set
X_train = df_joint_train_aug[selected_feature_names]
y_train_s = df_joint_train_aug['sentiment_value']
y_train_e = df_joint_train_aug['emotional_category']

# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# y_e_num = label_encoder.fit_transform(y_train_e)

X_test = df_joint_test[selected_feature_names]
y_test_s = df_joint_test['sentiment_value']
y_test_e = df_joint_test['emotional_category']

# y_test_e_num = label_encoder.fit_transform(y_test_e)

In [5]:
X_train.shape, X_test.shape

((24885, 128), (1180, 128))

In [6]:
# !pip install lightgbm

## Models

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score, precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier,HistGradientBoostingClassifier
from lightgbm import LGBMClassifier

# Common adjustable parameters
common_params = {
    'RandomForest': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest1': {'n_estimators': 500, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest2': {'n_estimators': 1000, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest3': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 3, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest4': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 5, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest5': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 10, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest6': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 20, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest7': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':50, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest8': {'n_estimators': 200, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':200, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest9': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':500, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest10': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':5, 'random_state': RANDOM_SEED},
    'RandomForest11': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':10, 'random_state': RANDOM_SEED},
    'RandomForest12': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':False, 'n_jobs':3, 'random_state': RANDOM_SEED},

    
    
    'SVM': {'kernel': 'rbf', 'C': 1.0, 'probability': True},
    'KNN': {'n_neighbors': 2},
    'KNN1': {'n_neighbors': 3},
    'KNN2': {'n_neighbors': 4},
    'KNN3': {'n_neighbors': 5},
    'KNN4': {'n_neighbors': 6},
    'KNN5': {'n_neighbors': 8},
    'KNN6': {'n_neighbors': 10},
    'KNN7': {'n_neighbors': 12},
    'KNN8': {'n_neighbors': 15},
    'KNN9': {'n_neighbors': 20},
    'KNN10': {'n_neighbors': 30},
    'GradientBoosting': {'loss': 'log_loss', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                         'criterion': 'friedman_mse', 'min_samples_split': 2, 'max_depth': 3},
    'GradientBoostingFast': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 100},
    'AdaBoost': {'n_estimators': 50, 'learning_rate': 1.0},
    'LightGBM': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}    
}

# Models with common adjustable parameters
dtree   = DecisionTreeClassifier()
rforest = RandomForestClassifier(**common_params['RandomForest'])
rforest1 = RandomForestClassifier(**common_params['RandomForest1'])
rforest2 = RandomForestClassifier(**common_params['RandomForest2'])
rforest3 = RandomForestClassifier(**common_params['RandomForest3'])
rforest4 = RandomForestClassifier(**common_params['RandomForest4'])
rforest5 = RandomForestClassifier(**common_params['RandomForest5'])
rforest6 = RandomForestClassifier(**common_params['RandomForest6'])
rforest7 = RandomForestClassifier(**common_params['RandomForest7'])
rforest8 = RandomForestClassifier(**common_params['RandomForest8'])
rforest9 = RandomForestClassifier(**common_params['RandomForest9'])
rforest10 = RandomForestClassifier(**common_params['RandomForest10'])
rforest11 = RandomForestClassifier(**common_params['RandomForest11'])
rforest12 = RandomForestClassifier(**common_params['RandomForest12'])

svm     = SVC(**common_params['SVM'])
knn     = KNeighborsClassifier(**common_params['KNN'])
knn1     = KNeighborsClassifier(**common_params['KNN1'])
knn2     = KNeighborsClassifier(**common_params['KNN2'])
knn3     = KNeighborsClassifier(**common_params['KNN3'])
knn4     = KNeighborsClassifier(**common_params['KNN4'])
knn5     = KNeighborsClassifier(**common_params['KNN5'])
knn6     = KNeighborsClassifier(**common_params['KNN6'])
knn7     = KNeighborsClassifier(**common_params['KNN7'])
knn8     = KNeighborsClassifier(**common_params['KNN8'])
knn9     = KNeighborsClassifier(**common_params['KNN9'])
knn10     = KNeighborsClassifier(**common_params['KNN10'])
gboost  = GradientBoostingClassifier(**common_params['GradientBoosting'])
gb_fast = HistGradientBoostingClassifier(**common_params['GradientBoostingFast'])
adaBoost= AdaBoostClassifier(**common_params['AdaBoost'])
lightgbm=LGBMClassifier(**common_params['LightGBM'])

In [8]:
# def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test):
#     start = time.time()
#     print(f'Model Name: {clf_model.__class__};\n Train set shape {X_train.shape}, num of class {y_train.unique().size}')
#     predictions = clf_model.fit(X_train, y_train).predict(X_test.values)
    
#     print(classification_report(y_test, predictions))
#     print(confusion_matrix(y_test, predictions))
    
#     precision, recall, f1score, support = precision_recall_fscore_support(y_test, predictions, average=None)
#     # TODO make all metrics into result dict for recording 
#     probabilities = clf_model.predict_proba(X_test.values)
#     print('prbabilities distribution: \n', pd.DataFrame(probabilities,columns=clf_model.classes_).describe())
#     print(f'Time taken: {round(time.time()-start,3)} seconds.\n')

In [9]:
import pickle

def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test,verbose=True):
    start = time.time()
    
    clf_model.fit(X_train, y_train)
    predictions = clf_model.predict(X_test.values)
    
    # Calculate metrics
    report = classification_report(y_test, predictions, output_dict=True)
    metrics = {
        'accuracy': report['accuracy'],
        'precision': report['macro avg']['precision'],
        'recall': report['macro avg']['recall'],
        'f1-score': report['macro avg']['f1-score']
    }
    for class_name in report.keys():
        if class_name not in ['accuracy', 'macro avg', 'weighted avg']:
            metrics[class_name+'_precision'] = report[class_name]['precision']
            metrics[class_name+'_recall'] = report[class_name]['recall'],
            metrics[class_name+'_f1-score'] = report[class_name]['f1-score']
    
    feature_columns = list(X_train.columns)
    num_classes = y_train.nunique()
    class_names = list(y_train.unique())
    
    model_filename = f"./models/{clf_model.__class__.__name__}_model"
    model_filename += f"_{num_classes}cls_{len(feature_columns)}feat_{round(report['accuracy']*100)}acc.pkl"
    with open(model_filename, 'wb') as file:
        pickle.dump(clf_model, file)
    
    results = {**metrics,
        'num_classes': num_classes,
        'class_names': class_names,
        'model_filename': model_filename,
        'feature_columns': feature_columns,
    }
    
    if verbose:
        print(f"Model Name: {clf_model.__class__.__name__};\nTrain set shape {X_train.shape}, num of class {num_classes}")
        print(classification_report(y_test, predictions))
        print(confusion_matrix(y_test, predictions))
        probabilities = clf_model.predict_proba(X_test.values)
        print('Probabilities distribution:\n', pd.DataFrame(probabilities, columns=clf_model.classes_).describe())
    print(f"Model: {clf_model.__class__.__name__};Time taken: {round(time.time()-start, 3)} seconds.\n")

    return results, clf_model


### Sentiment 3-class Classifier Sample code

In [10]:
result, m_trained = exp_clf_with_feature_selected(rforest, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.86      0.77      0.82       748
           0       0.55      0.88      0.67       183
           1       0.69      0.59      0.63       249

    accuracy                           0.75      1180
   macro avg       0.70      0.75      0.71      1180
weighted avg       0.78      0.75      0.76      1180

[[579 103  66]
 [ 21 161   1]
 [ 71  31 147]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.481357     0.239601     0.279042
std       0.235381     0.237817     0.233339
min       0.006418     0.000000     0.004744
25%       0.316403     0.025099     0.104183
50%       0.454022     0.162020     0.209974
75%       0.674010     0.403485     0.380907
max       0.963320     0.987306     0.993442
Model: RandomForestClassifier;Time taken: 46.645 seconds.


In [11]:
result, m_trained = exp_clf_with_feature_selected(rforest1, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.78      0.82       748
           0       0.54      0.86      0.66       183
           1       0.71      0.61      0.66       249

    accuracy                           0.76      1180
   macro avg       0.70      0.75      0.71      1180
weighted avg       0.78      0.76      0.76      1180

[[584 104  60]
 [ 22 158   3]
 [ 65  32 152]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.479763     0.240681     0.279556
std       0.234159     0.237439     0.232525
min       0.009044     0.000312     0.004150
25%       0.311041     0.025501     0.107213
50%       0.449891     0.166498     0.210666
75%       0.669705     0.410743     0.381287
max       0.951878     0.986796     0.989866
Model: RandomForestClassifier;Time taken: 232.446 seconds.

In [12]:
result, m_trained = exp_clf_with_feature_selected(rforest2, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.78      0.82       748
           0       0.54      0.86      0.66       183
           1       0.71      0.61      0.66       249

    accuracy                           0.76      1180
   macro avg       0.71      0.75      0.71      1180
weighted avg       0.78      0.76      0.76      1180

[[586 104  58]
 [ 22 158   3]
 [ 66  32 151]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.480513     0.240033     0.279454
std       0.234388     0.236996     0.232103
min       0.006465     0.000434     0.003154
25%       0.313515     0.025638     0.108295
50%       0.450835     0.167615     0.210527
75%       0.672000     0.406413     0.381686
max       0.955310     0.990381     0.986166
Model: RandomForestClassifier;Time taken: 490.082 seconds.

In [13]:
result, m_trained = exp_clf_with_feature_selected(rforest3, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.75      0.16      0.27       748
           0       0.27      0.94      0.42       183
           1       0.38      0.58      0.46       249

    accuracy                           0.37      1180
   macro avg       0.47      0.56      0.38      1180
weighted avg       0.60      0.37      0.33      1180

[[121 396 231]
 [  6 172   5]
 [ 35  69 145]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.351707     0.319624     0.328669
std       0.074903     0.186211     0.168172
min       0.183842     0.016656     0.105706
25%       0.309400     0.128995     0.181809
50%       0.338492     0.398444     0.276811
75%       0.396851     0.472658     0.452525
max       0.592035     0.704354     0.755006
Model: RandomForestClassifier;Time taken: 14.337 seconds.


In [14]:
result, m_trained = exp_clf_with_feature_selected(rforest4, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.86      0.50      0.63       748
           0       0.35      0.92      0.51       183
           1       0.55      0.58      0.56       249

    accuracy                           0.58      1180
   macro avg       0.59      0.67      0.57      1180
weighted avg       0.72      0.58      0.60      1180

[[373 260 115]
 [ 10 169   4]
 [ 50  55 144]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.390258     0.297660     0.312082
std       0.138903     0.208736     0.190669
min       0.079226     0.002977     0.044284
25%       0.306471     0.076312     0.161404
50%       0.364585     0.337416     0.261625
75%       0.486711     0.465132     0.411992
max       0.770678     0.874226     0.864125
Model: RandomForestClassifier;Time taken: 23.804 seconds.


In [15]:
result, m_trained = exp_clf_with_feature_selected(rforest5, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.89      0.72      0.80       748
           0       0.49      0.93      0.64       183
           1       0.69      0.60      0.64       249

    accuracy                           0.73      1180
   macro avg       0.69      0.75      0.69      1180
weighted avg       0.78      0.73      0.74      1180

[[542 140  66]
 [ 10 171   2]
 [ 58  41 150]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.458260     0.257664     0.284076
std       0.220633     0.237821     0.225902
min       0.014055     0.000245     0.006494
25%       0.305163     0.030370     0.120340
50%       0.428268     0.200392     0.215753
75%       0.631229     0.444116     0.385708
max       0.937948     0.976277     0.969216
Model: RandomForestClassifier;Time taken: 42.93 seconds.



In [16]:
result, m_trained = exp_clf_with_feature_selected(rforest6, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.77      0.82       748
           0       0.54      0.87      0.67       183
           1       0.69      0.61      0.65       249

    accuracy                           0.75      1180
   macro avg       0.70      0.75      0.71      1180
weighted avg       0.78      0.75      0.76      1180

[[576 106  66]
 [ 21 159   3]
 [ 66  30 153]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.480414     0.239838     0.279749
std       0.233903     0.237036     0.232894
min       0.005960     0.000209     0.003294
25%       0.315846     0.023076     0.106392
50%       0.453054     0.168713     0.207967
75%       0.670619     0.404659     0.382795
max       0.950470     0.984209     0.993546
Model: RandomForestClassifier;Time taken: 52.431 seconds.


In [17]:
result, m_trained = exp_clf_with_feature_selected(rforest7, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.81      0.84       748
           0       0.57      0.87      0.69       183
           1       0.75      0.63      0.68       249

    accuracy                           0.78      1180
   macro avg       0.73      0.77      0.74      1180
weighted avg       0.80      0.78      0.78      1180

[[604  95  49]
 [ 21 160   2]
 [ 67  26 156]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.496409     0.230870     0.272721
std       0.248865     0.241474     0.240291
min       0.003960     0.000000     0.000119
25%       0.322767     0.019268     0.095509
50%       0.472262     0.144614     0.197920
75%       0.698979     0.395172     0.376388
max       0.980972     0.992153     0.991552
Model: RandomForestClassifier;Time taken: 59.577 seconds.


In [18]:
result, m_trained = exp_clf_with_feature_selected(rforest8, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.88      0.74      0.81       748
           0       0.50      0.90      0.64       183
           1       0.68      0.60      0.64       249

    accuracy                           0.74      1180
   macro avg       0.69      0.75      0.69      1180
weighted avg       0.78      0.74      0.74      1180

[[556 125  67]
 [ 16 164   3]
 [ 59  41 149]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.458697     0.252946     0.288357
std       0.214813     0.232738     0.224347
min       0.012232     0.000845     0.006428
25%       0.307065     0.034135     0.121837
50%       0.436873     0.195347     0.231501
75%       0.622975     0.421364     0.386626
max       0.938499     0.978949     0.980202
Model: RandomForestClassifier;Time taken: 96.414 seconds.


In [19]:
result, m_trained = exp_clf_with_feature_selected(rforest9, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.88      0.70      0.78       748
           0       0.46      0.91      0.61       183
           1       0.65      0.58      0.61       249

    accuracy                           0.71      1180
   macro avg       0.66      0.73      0.67      1180
weighted avg       0.76      0.71      0.72      1180

[[524 150  74]
 [ 13 166   4]
 [ 60  45 144]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.427775     0.268543     0.303682
std       0.178380     0.220239     0.211061
min       0.022878     0.001740     0.013062
25%       0.311299     0.053172     0.137176
50%       0.413344     0.243965     0.265486
75%       0.554525     0.432926     0.399318
max       0.828470     0.960595     0.960365
Model: RandomForestClassifier;Time taken: 41.537 seconds.


In [20]:
result, m_trained = exp_clf_with_feature_selected(rforest10, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.86      0.77      0.82       748
           0       0.55      0.88      0.67       183
           1       0.69      0.59      0.63       249

    accuracy                           0.75      1180
   macro avg       0.70      0.75      0.71      1180
weighted avg       0.78      0.75      0.76      1180

[[579 103  66]
 [ 21 161   1]
 [ 71  31 147]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.481357     0.239601     0.279042
std       0.235381     0.237817     0.233339
min       0.006418     0.000000     0.004744
25%       0.316403     0.025099     0.104183
50%       0.454022     0.162020     0.209974
75%       0.674010     0.403485     0.380907
max       0.963320     0.987306     0.993442
Model: RandomForestClassifier;Time taken: 32.888 seconds.


In [21]:
result, m_trained = exp_clf_with_feature_selected(rforest11, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.86      0.77      0.82       748
           0       0.55      0.88      0.67       183
           1       0.69      0.59      0.63       249

    accuracy                           0.75      1180
   macro avg       0.70      0.75      0.71      1180
weighted avg       0.78      0.75      0.76      1180

[[579 103  66]
 [ 21 161   1]
 [ 71  31 147]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.481357     0.239601     0.279042
std       0.235381     0.237817     0.233339
min       0.006418     0.000000     0.004744
25%       0.316403     0.025099     0.104183
50%       0.454022     0.162020     0.209974
75%       0.674010     0.403485     0.380907
max       0.963320     0.987306     0.993442
Model: RandomForestClassifier;Time taken: 19.1 seconds.



In [22]:
result, m_trained = exp_clf_with_feature_selected(rforest12, X_train, X_test, y_train_s, y_test_s)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.79      0.83       748
           0       0.55      0.86      0.67       183
           1       0.72      0.61      0.66       249

    accuracy                           0.77      1180
   macro avg       0.71      0.76      0.72      1180
weighted avg       0.79      0.77      0.77      1180

[[594  98  56]
 [ 21 158   4]
 [ 65  32 152]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.494534     0.231630     0.273835
std       0.247751     0.241153     0.238799
min       0.006298     0.000000     0.002433
25%       0.321047     0.020256     0.098235
50%       0.469791     0.138112     0.200818
75%       0.692738     0.395575     0.374828
max       0.973753     0.985726     0.993594
Model: RandomForestClassifier;Time taken: 81.556 seconds.


In [23]:
# change model as the first parameter in the function 
# result, m_trained = exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_s, y_test_s)

In [24]:
# result, m_trained = exp_clf_with_feature_selected(adaBoost, X_train, X_test, y_train_s, y_test_s)

In [25]:
# result, m_trained = exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_s, y_test_s)

In [26]:
# result, m_trained = exp_clf_with_feature_selected(knn, X_train, X_test, y_train_s, y_test_s)

In [27]:
# result, m_trained = exp_clf_with_feature_selected(svm, X_train, X_test, y_train_s, y_test_s)

In [28]:
# result, m_trained = exp_clf_with_feature_selected(rforest, X_train, X_test, y_train_s, y_test_s)

In [29]:
# result, m_trained = exp_clf_with_feature_selected(lightgbm, X_train, X_test, y_train_s, y_test_s)

In [30]:
# result, m_trained = exp_clf_with_feature_selected(dtree, X_train, X_test, y_train_s, y_test_s)

### How to save experiment metrics result

In [31]:
exp_results = []
for clf_model in [rforest,rforest1,rforest2,rforest3,rforest4,rforest5,rforest6,rforest7,rforest8,rforest9,rforest10,rforest11,rforest12]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

Model: RandomForestClassifier;Time taken: 59.504 seconds.

Model: RandomForestClassifier;Time taken: 298.028 seconds.

Model: RandomForestClassifier;Time taken: 592.799 seconds.

Model: RandomForestClassifier;Time taken: 14.939 seconds.

Model: RandomForestClassifier;Time taken: 24.321 seconds.

Model: RandomForestClassifier;Time taken: 43.831 seconds.

Model: RandomForestClassifier;Time taken: 58.665 seconds.

Model: RandomForestClassifier;Time taken: 65.308 seconds.

Model: RandomForestClassifier;Time taken: 100.562 seconds.

Model: RandomForestClassifier;Time taken: 40.539 seconds.

Model: RandomForestClassifier;Time taken: 32.859 seconds.

Model: RandomForestClassifier;Time taken: 18.752 seconds.

Model: RandomForestClassifier;Time taken: 65.657 seconds.



Unnamed: 0,accuracy,precision,recall,f1-score,-1_precision,-1_recall,-1_f1-score,0_precision,0_recall,0_f1-score,1_precision,1_recall,1_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.751695,0.698523,0.748069,0.708232,0.862891,"(0.7740641711229946,)",0.816068,0.545763,"(0.8797814207650273,)",0.67364,0.686916,"(0.5903614457831325,)",0.634989,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.757627,0.704911,0.751526,0.713587,0.870343,"(0.7807486631016043,)",0.823115,0.537415,"(0.8633879781420765,)",0.662474,0.706977,"(0.6104417670682731,)",0.655172,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.758475,0.706372,0.751079,0.713921,0.869436,"(0.7834224598930482,)",0.824191,0.537415,"(0.8633879781420765,)",0.662474,0.712264,"(0.606425702811245,)",0.655098,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.371186,0.465836,0.561328,0.381921,0.746914,"(0.16176470588235295,)",0.265934,0.270016,"(0.9398907103825137,)",0.419512,0.380577,"(0.5823293172690763,)",0.460317,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.581356,0.586045,0.666825,0.566972,0.861432,"(0.49866310160427807,)",0.631668,0.349174,"(0.9234972677595629,)",0.506747,0.547529,"(0.5783132530120482,)",0.5625,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.731356,0.687464,0.753812,0.693294,0.888525,"(0.7245989304812834,)",0.798233,0.485795,"(0.9344262295081968,)",0.639252,0.688073,"(0.6024096385542169,)",0.642398,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.752542,0.698984,0.751121,0.710465,0.868778,"(0.7700534759358288,)",0.816442,0.538983,"(0.8688524590163934,)",0.665272,0.689189,"(0.6144578313253012,)",0.649682,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.779661,0.73195,0.769437,0.737585,0.872832,"(0.8074866310160428,)",0.838889,0.569395,"(0.8743169398907104,)",0.689655,0.753623,"(0.6265060240963856,)",0.684211,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.736441,0.686159,0.745961,0.69417,0.881141,"(0.7433155080213903,)",0.806381,0.49697,"(0.8961748633879781,)",0.639376,0.680365,"(0.5983935742971888,)",0.636752,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.70678,0.662068,0.728651,0.66698,0.877722,"(0.7005347593582888,)",0.779182,0.459834,"(0.907103825136612,)",0.610294,0.648649,"(0.5783132530120482,)",0.611465,3,"[-1, 1, 0]",./models/RandomForestClassifier_model_3cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [32]:
# exp_results = []
# for clf_model in [rforest,adaBoost,gb_fast,gboost,knn,svm,lightgbm,dtree]:
#     result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
#     exp_results.append(result)
# pd.DataFrame(exp_results)

In [33]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-rf.xlsx")

### Emotion 8-class

In [34]:
exp_results = []
for clf_model in [rforest,rforest1,rforest2,rforest3,rforest4,rforest5,rforest6,rforest7,rforest8,rforest9,rforest10,rforest11,rforest12]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_e, y_test_e,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

Model: RandomForestClassifier;Time taken: 46.52 seconds.

Model: RandomForestClassifier;Time taken: 223.968 seconds.

Model: RandomForestClassifier;Time taken: 450.691 seconds.

Model: RandomForestClassifier;Time taken: 11.562 seconds.

Model: RandomForestClassifier;Time taken: 19.946 seconds.

Model: RandomForestClassifier;Time taken: 36.807 seconds.

Model: RandomForestClassifier;Time taken: 43.514 seconds.

Model: RandomForestClassifier;Time taken: 50.938 seconds.

Model: RandomForestClassifier;Time taken: 76.375 seconds.

Model: RandomForestClassifier;Time taken: 29.762 seconds.

Model: RandomForestClassifier;Time taken: 29.45 seconds.

Model: RandomForestClassifier;Time taken: 19.316 seconds.

Model: RandomForestClassifier;Time taken: 62.458 seconds.



Unnamed: 0,accuracy,precision,recall,f1-score,Anger_precision,Anger_recall,Anger_f1-score,Calmness_precision,Calmness_recall,Calmness_f1-score,...,Sadness_precision,Sadness_recall,Sadness_f1-score,Surprise_precision,Surprise_recall,Surprise_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.563559,0.676094,0.641858,0.573196,0.897059,"(0.6524064171122995,)",0.755418,0.4,"(1.0,)",0.571429,...,0.670588,"(0.6096256684491979,)",0.638655,0.779412,"(0.8548387096774194,)",0.815385,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.55678,0.670201,0.636591,0.562513,0.882353,"(0.6417112299465241,)",0.743034,0.372093,"(1.0,)",0.542373,...,0.672727,"(0.5935828877005348,)",0.630682,0.768116,"(0.8548387096774194,)",0.80916,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.555085,0.670076,0.633906,0.560517,0.875912,"(0.6417112299465241,)",0.740741,0.372093,"(1.0,)",0.542373,...,0.668675,"(0.5935828877005348,)",0.628895,0.764706,"(0.8387096774193549,)",0.8,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.288983,0.351742,0.378993,0.266673,0.85,"(0.18181818181818182,)",0.299559,0.457143,"(1.0,)",0.627451,...,0.0,"(0.0,)",0.0,1.0,"(0.2903225806451613,)",0.45,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.383898,0.470103,0.484856,0.390131,0.849057,"(0.48128342245989303,)",0.614334,0.421053,"(1.0,)",0.592593,...,0.0,"(0.0,)",0.0,0.891304,"(0.6612903225806451,)",0.759259,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.544915,0.684489,0.624937,0.560003,0.887218,"(0.6310160427807486,)",0.7375,0.4,"(1.0,)",0.571429,...,0.726619,"(0.5401069518716578,)",0.619632,0.809524,"(0.8225806451612904,)",0.816,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.554237,0.665445,0.630542,0.560776,0.853147,"(0.6524064171122995,)",0.739394,0.390244,"(1.0,)",0.561404,...,0.670659,"(0.5989304812834224,)",0.632768,0.769231,"(0.8064516129032258,)",0.787402,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.573729,0.665144,0.651147,0.578436,0.881119,"(0.6737967914438503,)",0.763636,0.380952,"(1.0,)",0.551724,...,0.666667,"(0.6203208556149733,)",0.642659,0.782609,"(0.8709677419354839,)",0.824427,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.538983,0.679332,0.617163,0.54946,0.871212,"(0.6149732620320856,)",0.721003,0.372093,"(1.0,)",0.542373,...,0.691275,"(0.5508021390374331,)",0.613095,0.816667,"(0.7903225806451613,)",0.803279,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.502542,0.675922,0.579226,0.514729,0.847328,"(0.5935828877005348,)",0.698113,0.380952,"(1.0,)",0.551724,...,0.714286,"(0.45454545454545453,)",0.555556,0.807692,"(0.6774193548387096,)",0.736842,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/RandomForestClassifier_model_8cls_128...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [35]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-rf-8.xlsx")

In [36]:
exp_clf_with_feature_selected(rforest, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.90      0.65      0.76       187
    Calmness       0.40      1.00      0.57        16
     Disgust       0.84      0.22      0.35       187
        Fear       1.00      0.24      0.39       187
   Happiness       0.45      0.69      0.54       187
  Neutrality       0.38      0.87      0.53       167
     Sadness       0.67      0.61      0.64       187
    Surprise       0.78      0.85      0.82        62

    accuracy                           0.56      1180
   macro avg       0.68      0.64      0.57      1180
weighted avg       0.71      0.56      0.55      1180

[[122   0   5   0  53   5   1   1]
 [  0  16   0   0   0   0   0   0]
 [  1   2  41   0  37  89  14   3]
 [  4   0   0  45  52  51  29   6]
 [  9   5   1   0 129  38   3   2]
 [  0   8   0   0   5 145   9   0]
 [  0   8   1   0  10  51 114   3]
 [  0   1   1   0   3  

({'accuracy': 0.5635593220338984,
  'precision': 0.6760937972178581,
  'recall': 0.641858361076708,
  'f1-score': 0.5731956001564034,
  'Anger_precision': 0.8970588235294118,
  'Anger_recall': (0.6524064171122995,),
  'Anger_f1-score': 0.7554179566563469,
  'Calmness_precision': 0.4,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5714285714285715,
  'Disgust_precision': 0.8367346938775511,
  'Disgust_recall': (0.2192513368983957,),
  'Disgust_f1-score': 0.34745762711864403,
  'Fear_precision': 1.0,
  'Fear_recall': (0.24064171122994651,),
  'Fear_f1-score': 0.38793103448275856,
  'Happiness_precision': 0.4463667820069204,
  'Happiness_recall': (0.6898395721925134,),
  'Happiness_f1-score': 0.5420168067226891,
  'Neutrality_precision': 0.3785900783289817,
  'Neutrality_recall': (0.8682634730538922,),
  'Neutrality_f1-score': 0.5272727272727273,
  'Sadness_precision': 0.6705882352941176,
  'Sadness_recall': (0.6096256684491979,),
  'Sadness_f1-score': 0.6386554621848739,
  'Surpri

In [37]:
exp_clf_with_feature_selected(rforest1, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.88      0.64      0.74       187
    Calmness       0.37      1.00      0.54        16
     Disgust       0.85      0.21      0.33       187
        Fear       1.00      0.23      0.37       187
   Happiness       0.44      0.69      0.54       187
  Neutrality       0.38      0.87      0.53       167
     Sadness       0.67      0.59      0.63       187
    Surprise       0.77      0.85      0.81        62

    accuracy                           0.56      1180
   macro avg       0.67      0.64      0.56      1180
weighted avg       0.71      0.56      0.54      1180

[[120   0   4   0  57   4   0   2]
 [  0  16   0   0   0   0   0   0]
 [  1   4  39   0  38  85  16   4]
 [  5   0   0  43  52  53  28   6]
 [ 10   6   2   0 129  37   2   1]
 [  0   8   0   0   5 146   8   0]
 [  0   8   1   0  10  54 111   3]
 [  0   1   0   0   4  

({'accuracy': 0.5567796610169492,
  'precision': 0.6702005557655885,
  'recall': 0.6365908212899714,
  'f1-score': 0.5625131066296988,
  'Anger_precision': 0.8823529411764706,
  'Anger_recall': (0.6417112299465241,),
  'Anger_f1-score': 0.7430340557275542,
  'Calmness_precision': 0.37209302325581395,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5423728813559322,
  'Disgust_precision': 0.8478260869565217,
  'Disgust_recall': (0.20855614973262032,),
  'Disgust_f1-score': 0.33476394849785407,
  'Fear_precision': 1.0,
  'Fear_recall': (0.22994652406417113,),
  'Fear_f1-score': 0.3739130434782609,
  'Happiness_precision': 0.43728813559322033,
  'Happiness_recall': (0.6898395721925134,),
  'Happiness_f1-score': 0.5352697095435685,
  'Neutrality_precision': 0.381201044386423,
  'Neutrality_recall': (0.874251497005988,),
  'Neutrality_f1-score': 0.530909090909091,
  'Sadness_precision': 0.6727272727272727,
  'Sadness_recall': (0.5935828877005348,),
  'Sadness_f1-score': 0.630681818181

In [38]:
exp_clf_with_feature_selected(rforest2, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.88      0.64      0.74       187
    Calmness       0.37      1.00      0.54        16
     Disgust       0.86      0.20      0.33       187
        Fear       1.00      0.24      0.38       187
   Happiness       0.44      0.68      0.53       187
  Neutrality       0.38      0.87      0.53       167
     Sadness       0.67      0.59      0.63       187
    Surprise       0.76      0.84      0.80        62

    accuracy                           0.56      1180
   macro avg       0.67      0.63      0.56      1180
weighted avg       0.71      0.56      0.54      1180

[[120   0   4   0  58   3   1   1]
 [  0  16   0   0   0   0   0   0]
 [  2   3  38   0  37  87  16   4]
 [  5   0   0  44  51  53  28   6]
 [ 10   6   1   0 128  38   2   2]
 [  0   8   0   0   5 146   8   0]
 [  0   8   1   0  10  54 111   3]
 [  0   2   0   0   5  

({'accuracy': 0.5550847457627118,
  'precision': 0.6700756074740776,
  'recall': 0.6339062430598523,
  'f1-score': 0.5605169502441141,
  'Anger_precision': 0.8759124087591241,
  'Anger_recall': (0.6417112299465241,),
  'Anger_f1-score': 0.7407407407407407,
  'Calmness_precision': 0.37209302325581395,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5423728813559322,
  'Disgust_precision': 0.8636363636363636,
  'Disgust_recall': (0.20320855614973263,),
  'Disgust_f1-score': 0.32900432900432897,
  'Fear_precision': 1.0,
  'Fear_recall': (0.23529411764705882,),
  'Fear_f1-score': 0.38095238095238093,
  'Happiness_precision': 0.43537414965986393,
  'Happiness_recall': (0.6844919786096256,),
  'Happiness_f1-score': 0.5322245322245323,
  'Neutrality_precision': 0.3802083333333333,
  'Neutrality_recall': (0.874251497005988,),
  'Neutrality_f1-score': 0.52994555353902,
  'Sadness_precision': 0.6686746987951807,
  'Sadness_recall': (0.5935828877005348,),
  'Sadness_f1-score': 0.62889518413

In [39]:
exp_clf_with_feature_selected(rforest3, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.85      0.18      0.30       187
    Calmness       0.46      1.00      0.63        16
     Disgust       0.00      0.00      0.00       187
        Fear       0.00      0.00      0.00       187
   Happiness       0.27      0.63      0.37       187
  Neutrality       0.24      0.93      0.38       167
     Sadness       0.00      0.00      0.00       187
    Surprise       1.00      0.29      0.45        62

    accuracy                           0.29      1180
   macro avg       0.35      0.38      0.27      1180
weighted avg       0.27      0.29      0.19      1180

[[ 34   0   0   0 137  16   0   0]
 [  0  16   0   0   0   0   0   0]
 [  0   0   0   0  44 143   0   0]
 [  0   0   0   0  96  91   0   0]
 [  6   2   0   0 117  62   0   0]
 [  0   8   0   0   3 156   0   0]
 [  0   8   0   0  11 168   0   0]
 [  0   1   0   0  33  

({'accuracy': 0.2889830508474576,
  'precision': 0.3517418809629115,
  'recall': 0.3789926185235188,
  'f1-score': 0.26667321929554066,
  'Anger_precision': 0.85,
  'Anger_recall': (0.18181818181818182,),
  'Anger_f1-score': 0.29955947136563876,
  'Calmness_precision': 0.45714285714285713,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.6274509803921569,
  'Disgust_precision': 0.0,
  'Disgust_recall': (0.0,),
  'Disgust_f1-score': 0.0,
  'Fear_precision': 0.0,
  'Fear_recall': (0.0,),
  'Fear_f1-score': 0.0,
  'Happiness_precision': 0.2653061224489796,
  'Happiness_recall': (0.6256684491978609,),
  'Happiness_f1-score': 0.3726114649681529,
  'Neutrality_precision': 0.24148606811145512,
  'Neutrality_recall': (0.9341317365269461,),
  'Neutrality_f1-score': 0.38376383763837646,
  'Sadness_precision': 0.0,
  'Sadness_recall': (0.0,),
  'Sadness_f1-score': 0.0,
  'Surprise_precision': 1.0,
  'Surprise_recall': (0.2903225806451613,),
  'Surprise_f1-score': 0.45000000000000007,
  'num_

In [40]:
exp_clf_with_feature_selected(rforest4, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.85      0.48      0.61       187
    Calmness       0.42      1.00      0.59        16
     Disgust       0.00      0.00      0.00       187
        Fear       1.00      0.19      0.32       187
   Happiness       0.35      0.61      0.45       187
  Neutrality       0.25      0.93      0.39       167
     Sadness       0.00      0.00      0.00       187
    Surprise       0.89      0.66      0.76        62

    accuracy                           0.38      1180
   macro avg       0.47      0.48      0.39      1180
weighted avg       0.44      0.38      0.32      1180

[[ 90   0   0   0  83  14   0   0]
 [  0  16   0   0   0   0   0   0]
 [  1   0   0   0  47 138   0   1]
 [  3   0   0  35  56  91   0   2]
 [ 11   4   0   0 115  57   0   0]
 [  0   8   0   0   3 156   0   0]
 [  0   8   0   0  12 165   0   2]
 [  1   2   0   0  12  

({'accuracy': 0.38389830508474576,
  'precision': 0.47010339587842204,
  'recall': 0.4848555648750799,
  'f1-score': 0.39013133539738254,
  'Anger_precision': 0.8490566037735849,
  'Anger_recall': (0.48128342245989303,),
  'Anger_f1-score': 0.6143344709897611,
  'Calmness_precision': 0.42105263157894735,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5925925925925926,
  'Disgust_precision': 0.0,
  'Disgust_recall': (0.0,),
  'Disgust_f1-score': 0.0,
  'Fear_precision': 1.0,
  'Fear_recall': (0.18716577540106952,),
  'Fear_f1-score': 0.3153153153153153,
  'Happiness_precision': 0.35060975609756095,
  'Happiness_recall': (0.6149732620320856,),
  'Happiness_f1-score': 0.4466019417475728,
  'Neutrality_precision': 0.24880382775119617,
  'Neutrality_recall': (0.9341317365269461,),
  'Neutrality_f1-score': 0.39294710327455923,
  'Sadness_precision': 0.0,
  'Sadness_recall': (0.0,),
  'Sadness_f1-score': 0.0,
  'Surprise_precision': 0.8913043478260869,
  'Surprise_recall': (0.661290322

In [41]:
exp_clf_with_feature_selected(rforest5, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.89      0.63      0.74       187
    Calmness       0.40      1.00      0.57        16
     Disgust       0.87      0.21      0.34       187
        Fear       1.00      0.22      0.37       187
   Happiness       0.44      0.67      0.53       187
  Neutrality       0.35      0.90      0.51       167
     Sadness       0.73      0.54      0.62       187
    Surprise       0.81      0.82      0.82        62

    accuracy                           0.54      1180
   macro avg       0.68      0.62      0.56      1180
weighted avg       0.72      0.54      0.53      1180

[[118   0   4   0  57   8   0   0]
 [  0  16   0   0   0   0   0   0]
 [  0   2  39   0  36  99   8   3]
 [  5   0   0  42  53  60  22   5]
 [ 10   6   1   0 125  42   2   1]
 [  0   8   0   0   2 151   6   0]
 [  0   8   1   0   8  66 101   3]
 [  0   0   0   0   6  

({'accuracy': 0.5449152542372881,
  'precision': 0.6844894154834835,
  'recall': 0.6249374418318787,
  'f1-score': 0.5600028099550752,
  'Anger_precision': 0.8872180451127819,
  'Anger_recall': (0.6310160427807486,),
  'Anger_f1-score': 0.7374999999999999,
  'Calmness_precision': 0.4,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5714285714285715,
  'Disgust_precision': 0.8666666666666667,
  'Disgust_recall': (0.20855614973262032,),
  'Disgust_f1-score': 0.33620689655172414,
  'Fear_precision': 1.0,
  'Fear_recall': (0.22459893048128343,),
  'Fear_f1-score': 0.36681222707423583,
  'Happiness_precision': 0.4355400696864111,
  'Happiness_recall': (0.6684491978609626,),
  'Happiness_f1-score': 0.5274261603375527,
  'Neutrality_precision': 0.3503480278422274,
  'Neutrality_recall': (0.9041916167664671,),
  'Neutrality_f1-score': 0.5050167224080268,
  'Sadness_precision': 0.7266187050359713,
  'Sadness_recall': (0.5401069518716578,),
  'Sadness_f1-score': 0.6196319018404908,
  'Surp

In [42]:
exp_clf_with_feature_selected(rforest6, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.85      0.65      0.74       187
    Calmness       0.39      1.00      0.56        16
     Disgust       0.83      0.20      0.33       187
        Fear       1.00      0.24      0.38       187
   Happiness       0.43      0.67      0.53       187
  Neutrality       0.38      0.87      0.53       167
     Sadness       0.67      0.60      0.63       187
    Surprise       0.77      0.81      0.79        62

    accuracy                           0.55      1180
   macro avg       0.67      0.63      0.56      1180
weighted avg       0.70      0.55      0.54      1180

[[122   0   4   0  55   5   0   1]
 [  0  16   0   0   0   0   0   0]
 [  1   2  38   0  38  89  15   4]
 [  6   1   0  44  50  52  28   6]
 [ 14   6   3   0 126  34   3   1]
 [  0   8   0   0   4 146   9   0]
 [  0   8   1   0  11  52 112   3]
 [  0   0   0   0   7  

({'accuracy': 0.5542372881355933,
  'precision': 0.6654447373851486,
  'recall': 0.6305424341931971,
  'f1-score': 0.5607757208303508,
  'Anger_precision': 0.8531468531468531,
  'Anger_recall': (0.6524064171122995,),
  'Anger_f1-score': 0.7393939393939394,
  'Calmness_precision': 0.3902439024390244,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5614035087719299,
  'Disgust_precision': 0.8260869565217391,
  'Disgust_recall': (0.20320855614973263,),
  'Disgust_f1-score': 0.3261802575107296,
  'Fear_precision': 1.0,
  'Fear_recall': (0.23529411764705882,),
  'Fear_f1-score': 0.38095238095238093,
  'Happiness_precision': 0.4329896907216495,
  'Happiness_recall': (0.6737967914438503,),
  'Happiness_f1-score': 0.5271966527196652,
  'Neutrality_precision': 0.381201044386423,
  'Neutrality_recall': (0.874251497005988,),
  'Neutrality_f1-score': 0.530909090909091,
  'Sadness_precision': 0.6706586826347305,
  'Sadness_recall': (0.5989304812834224,),
  'Sadness_f1-score': 0.63276836158192

In [43]:
exp_clf_with_feature_selected(rforest7, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.88      0.67      0.76       187
    Calmness       0.38      1.00      0.55        16
     Disgust       0.79      0.22      0.35       187
        Fear       0.96      0.25      0.39       187
   Happiness       0.46      0.71      0.56       187
  Neutrality       0.40      0.86      0.54       167
     Sadness       0.67      0.62      0.64       187
    Surprise       0.78      0.87      0.82        62

    accuracy                           0.57      1180
   macro avg       0.67      0.65      0.58      1180
weighted avg       0.70      0.57      0.56      1180

[[126   0   5   0  50   4   1   1]
 [  0  16   0   0   0   0   0   0]
 [  2   2  42   1  35  84  17   4]
 [  5   1   2  46  50  48  29   6]
 [ 10   5   1   0 133  34   3   1]
 [  0   8   0   1   6 144   8   0]
 [  0   8   2   0  10  48 116   3]
 [  0   2   1   0   3  

({'accuracy': 0.573728813559322,
  'precision': 0.6651439772078566,
  'recall': 0.6511473774892858,
  'f1-score': 0.5784363674898798,
  'Anger_precision': 0.8811188811188811,
  'Anger_recall': (0.6737967914438503,),
  'Anger_f1-score': 0.7636363636363637,
  'Calmness_precision': 0.38095238095238093,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5517241379310345,
  'Disgust_precision': 0.7924528301886793,
  'Disgust_recall': (0.22459893048128343,),
  'Disgust_f1-score': 0.35000000000000003,
  'Fear_precision': 0.9583333333333334,
  'Fear_recall': (0.24598930481283424,),
  'Fear_f1-score': 0.39148936170212767,
  'Happiness_precision': 0.4634146341463415,
  'Happiness_recall': (0.7112299465240641,),
  'Happiness_f1-score': 0.5611814345991561,
  'Neutrality_precision': 0.3956043956043956,
  'Neutrality_recall': (0.8622754491017964,),
  'Neutrality_f1-score': 0.5423728813559322,
  'Sadness_precision': 0.6666666666666666,
  'Sadness_recall': (0.6203208556149733,),
  'Sadness_f1-score

In [44]:
exp_clf_with_feature_selected(rforest8, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.87      0.61      0.72       187
    Calmness       0.37      1.00      0.54        16
     Disgust       0.90      0.20      0.32       187
        Fear       1.00      0.22      0.36       187
   Happiness       0.43      0.69      0.53       187
  Neutrality       0.36      0.87      0.51       167
     Sadness       0.69      0.55      0.61       187
    Surprise       0.82      0.79      0.80        62

    accuracy                           0.54      1180
   macro avg       0.68      0.62      0.55      1180
weighted avg       0.71      0.54      0.52      1180

[[115   0   1   0  63   7   0   1]
 [  0  16   0   0   0   0   0   0]
 [  1   3  37   0  36  96  10   4]
 [  6   0   0  41  52  57  27   4]
 [ 10   6   0   0 129  40   2   0]
 [  0   8   0   0   6 146   7   0]
 [  0   8   3   0  10  61 103   2]
 [  0   2   0   0   7  

({'accuracy': 0.5389830508474577,
  'precision': 0.6793324651399772,
  'recall': 0.6171626687973027,
  'f1-score': 0.5494601740939213,
  'Anger_precision': 0.8712121212121212,
  'Anger_recall': (0.6149732620320856,),
  'Anger_f1-score': 0.7210031347962381,
  'Calmness_precision': 0.37209302325581395,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5423728813559322,
  'Disgust_precision': 0.9024390243902439,
  'Disgust_recall': (0.19786096256684493,),
  'Disgust_f1-score': 0.324561403508772,
  'Fear_precision': 1.0,
  'Fear_recall': (0.2192513368983957,),
  'Fear_f1-score': 0.35964912280701755,
  'Happiness_precision': 0.42574257425742573,
  'Happiness_recall': (0.6898395721925134,),
  'Happiness_f1-score': 0.5265306122448978,
  'Neutrality_precision': 0.35523114355231145,
  'Neutrality_recall': (0.874251497005988,),
  'Neutrality_f1-score': 0.5051903114186851,
  'Sadness_precision': 0.6912751677852349,
  'Sadness_recall': (0.5508021390374331,),
  'Sadness_f1-score': 0.61309523809

In [45]:
exp_clf_with_feature_selected(rforest9, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.85      0.59      0.70       187
    Calmness       0.38      1.00      0.55        16
     Disgust       0.94      0.16      0.27       187
        Fear       1.00      0.20      0.33       187
   Happiness       0.39      0.66      0.49       187
  Neutrality       0.33      0.89      0.48       167
     Sadness       0.71      0.45      0.56       187
    Surprise       0.81      0.68      0.74        62

    accuracy                           0.50      1180
   macro avg       0.68      0.58      0.51      1180
weighted avg       0.71      0.50      0.49      1180

[[111   0   1   0  66   8   0   1]
 [  0  16   0   0   0   0   0   0]
 [  2   0  30   0  38 105   7   5]
 [  6   1   0  37  58  63  21   1]
 [ 11   6   0   0 123  45   1   1]
 [  0   8   0   0   5 149   5   0]
 [  0   8   0   0  14  78  85   2]
 [  1   3   1   0  12  

({'accuracy': 0.502542372881356,
  'precision': 0.6759220117084204,
  'recall': 0.5792257558369547,
  'f1-score': 0.5147288756881008,
  'Anger_precision': 0.8473282442748091,
  'Anger_recall': (0.5935828877005348,),
  'Anger_f1-score': 0.6981132075471699,
  'Calmness_precision': 0.38095238095238093,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5517241379310345,
  'Disgust_precision': 0.9375,
  'Disgust_recall': (0.16042780748663102,),
  'Disgust_f1-score': 0.273972602739726,
  'Fear_precision': 1.0,
  'Fear_recall': (0.19786096256684493,),
  'Fear_f1-score': 0.3303571428571429,
  'Happiness_precision': 0.38924050632911394,
  'Happiness_recall': (0.6577540106951871,),
  'Happiness_f1-score': 0.48906560636182894,
  'Neutrality_precision': 0.3303769401330377,
  'Neutrality_recall': (0.8922155688622755,),
  'Neutrality_f1-score': 0.482200647249191,
  'Sadness_precision': 0.7142857142857143,
  'Sadness_recall': (0.45454545454545453,),
  'Sadness_f1-score': 0.5555555555555556,
  'Su

In [46]:
exp_clf_with_feature_selected(rforest10, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.90      0.65      0.76       187
    Calmness       0.40      1.00      0.57        16
     Disgust       0.84      0.22      0.35       187
        Fear       1.00      0.24      0.39       187
   Happiness       0.45      0.69      0.54       187
  Neutrality       0.38      0.87      0.53       167
     Sadness       0.67      0.61      0.64       187
    Surprise       0.78      0.85      0.82        62

    accuracy                           0.56      1180
   macro avg       0.68      0.64      0.57      1180
weighted avg       0.71      0.56      0.55      1180

[[122   0   5   0  53   5   1   1]
 [  0  16   0   0   0   0   0   0]
 [  1   2  41   0  37  89  14   3]
 [  4   0   0  45  52  51  29   6]
 [  9   5   1   0 129  38   3   2]
 [  0   8   0   0   5 145   9   0]
 [  0   8   1   0  10  51 114   3]
 [  0   1   1   0   3  

({'accuracy': 0.5635593220338984,
  'precision': 0.6760937972178581,
  'recall': 0.641858361076708,
  'f1-score': 0.5731956001564034,
  'Anger_precision': 0.8970588235294118,
  'Anger_recall': (0.6524064171122995,),
  'Anger_f1-score': 0.7554179566563469,
  'Calmness_precision': 0.4,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5714285714285715,
  'Disgust_precision': 0.8367346938775511,
  'Disgust_recall': (0.2192513368983957,),
  'Disgust_f1-score': 0.34745762711864403,
  'Fear_precision': 1.0,
  'Fear_recall': (0.24064171122994651,),
  'Fear_f1-score': 0.38793103448275856,
  'Happiness_precision': 0.4463667820069204,
  'Happiness_recall': (0.6898395721925134,),
  'Happiness_f1-score': 0.5420168067226891,
  'Neutrality_precision': 0.3785900783289817,
  'Neutrality_recall': (0.8682634730538922,),
  'Neutrality_f1-score': 0.5272727272727273,
  'Sadness_precision': 0.6705882352941176,
  'Sadness_recall': (0.6096256684491979,),
  'Sadness_f1-score': 0.6386554621848739,
  'Surpri

In [47]:
exp_clf_with_feature_selected(rforest11, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.90      0.65      0.76       187
    Calmness       0.40      1.00      0.57        16
     Disgust       0.84      0.22      0.35       187
        Fear       1.00      0.24      0.39       187
   Happiness       0.45      0.69      0.54       187
  Neutrality       0.38      0.87      0.53       167
     Sadness       0.67      0.61      0.64       187
    Surprise       0.78      0.85      0.82        62

    accuracy                           0.56      1180
   macro avg       0.68      0.64      0.57      1180
weighted avg       0.71      0.56      0.55      1180

[[122   0   5   0  53   5   1   1]
 [  0  16   0   0   0   0   0   0]
 [  1   2  41   0  37  89  14   3]
 [  4   0   0  45  52  51  29   6]
 [  9   5   1   0 129  38   3   2]
 [  0   8   0   0   5 145   9   0]
 [  0   8   1   0  10  51 114   3]
 [  0   1   1   0   3  

({'accuracy': 0.5635593220338984,
  'precision': 0.6760937972178581,
  'recall': 0.641858361076708,
  'f1-score': 0.5731956001564034,
  'Anger_precision': 0.8970588235294118,
  'Anger_recall': (0.6524064171122995,),
  'Anger_f1-score': 0.7554179566563469,
  'Calmness_precision': 0.4,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5714285714285715,
  'Disgust_precision': 0.8367346938775511,
  'Disgust_recall': (0.2192513368983957,),
  'Disgust_f1-score': 0.34745762711864403,
  'Fear_precision': 1.0,
  'Fear_recall': (0.24064171122994651,),
  'Fear_f1-score': 0.38793103448275856,
  'Happiness_precision': 0.4463667820069204,
  'Happiness_recall': (0.6898395721925134,),
  'Happiness_f1-score': 0.5420168067226891,
  'Neutrality_precision': 0.3785900783289817,
  'Neutrality_recall': (0.8682634730538922,),
  'Neutrality_f1-score': 0.5272727272727273,
  'Sadness_precision': 0.6705882352941176,
  'Sadness_recall': (0.6096256684491979,),
  'Sadness_f1-score': 0.6386554621848739,
  'Surpri

In [48]:
exp_clf_with_feature_selected(rforest12, X_train, X_test, y_train_e, y_test_e)

Model Name: RandomForestClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.87      0.70      0.78       187
    Calmness       0.38      1.00      0.55        16
     Disgust       0.80      0.21      0.33       187
        Fear       1.00      0.24      0.39       187
   Happiness       0.46      0.68      0.55       187
  Neutrality       0.39      0.87      0.54       167
     Sadness       0.66      0.61      0.64       187
    Surprise       0.79      0.89      0.83        62

    accuracy                           0.57      1180
   macro avg       0.67      0.65      0.58      1180
weighted avg       0.70      0.57      0.55      1180

[[131   0   5   0  45   5   0   1]
 [  0  16   0   0   0   0   0   0]
 [  2   2  39   0  40  83  18   3]
 [  8   0   0  45  48  50  29   7]
 [ 10   7   2   0 128  36   2   2]
 [  0   8   0   0   4 146   9   0]
 [  0   8   3   0  10  50 114   2]
 [  0   1   0   0   3  

({'accuracy': 0.5711864406779661,
  'precision': 0.6680972458458256,
  'recall': 0.650649817322402,
  'f1-score': 0.5756275971878794,
  'Anger_precision': 0.8675496688741722,
  'Anger_recall': (0.7005347593582888,),
  'Anger_f1-score': 0.7751479289940829,
  'Calmness_precision': 0.38095238095238093,
  'Calmness_recall': (1.0,),
  'Calmness_f1-score': 0.5517241379310345,
  'Disgust_precision': 0.7959183673469388,
  'Disgust_recall': (0.20855614973262032,),
  'Disgust_f1-score': 0.3305084745762712,
  'Fear_precision': 1.0,
  'Fear_recall': (0.24064171122994651,),
  'Fear_f1-score': 0.38793103448275856,
  'Happiness_precision': 0.460431654676259,
  'Happiness_recall': (0.6844919786096256,),
  'Happiness_f1-score': 0.5505376344086023,
  'Neutrality_precision': 0.3914209115281501,
  'Neutrality_recall': (0.874251497005988,),
  'Neutrality_f1-score': 0.5407407407407407,
  'Sadness_precision': 0.6627906976744186,
  'Sadness_recall': (0.6096256684491979,),
  'Sadness_f1-score': 0.6350974930362

In [49]:
# change y_lable into emo
# exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_e, y_test_e)

In [50]:
# check how long on normal gradient boosting
# exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_e, y_test_e)

### Threshold tuning

In [51]:
# Calculate accuracy for the given threshold

probabilities = rforest6.predict_proba(X_test)

def calc_acc_by_thres(probabilities, threshold, y_test):
    predictions_adj = []
    # Loop through each sample's probabilities
    for probs in probabilities:
        if probs[0] > threshold:
            pred_class = -1
        elif probs[1] > probs[2]:
            pred_class = 0
        else:
            pred_class = 1
        predictions_adj.append(pred_class)
    accuracy = np.mean(predictions_adj == y_test)
    precision, recall, f1score, _ = precision_recall_fscore_support(y_test_s, predictions_adj, average=None)
    return accuracy, min(f1score), np.var(f1score)

best_threshold = None
best_accuracy = 0.0
best_f1score = 0.0
# best_f1s_var = 10

# Define a range of threshold values to try
threshold_range = np.linspace(0.25, 0.75, 100)
for threshold in threshold_range:
    accuracy, min_f1_score, var_f1_score = calc_acc_by_thres(probabilities, threshold, y_test_s)
    # if accuracy > best_accuracy:
    #     best_accuracy = accuracy
    #     best_threshold = threshold
    if min_f1_score > best_f1score:
        best_f1score = min_f1_score
        best_threshold = threshold
        best_accuracy = accuracy
    # if var_f1_score < best_f1s_var:
    #     best_f1score = min_f1_score
    #     best_threshold = threshold
    #     best_accuracy = accuracy
    #     best_f1s_var = var_f1_score
        

print("Best Threshold:", best_threshold)
print("Best Accuracy:", best_accuracy)
print("Best min f1 score:", best_f1score)
# print("Best var f1 score:", best_f1s_var)

Best Threshold: 0.25
Best Accuracy: 0.34576271186440677
Best min f1 score: 0.21399176954732513


In [53]:
# if you want to adjust the threshold; Predict probabilities for each class label;
# best threshold and best_threshold x 110% 120% 130% to see trend
probabilities = rforest6.predict_proba(X_test)

threshold = best_threshold
print('BEST Threshold:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.1
print('Threshold-2:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.2
print('Threshold-3:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.3
print('Threshold-4:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

BEST Threshold: 0.25
              precision    recall  f1-score   support

          -1       0.87      0.23      0.37       748
           0       0.43      0.14      0.21       183
           1       0.23      0.84      0.36       249

    accuracy                           0.35      1180
   macro avg       0.51      0.40      0.31      1180
weighted avg       0.67      0.35      0.34      1180

[[173  20 555]
 [  0  26 157]
 [ 26  14 209]]
Accuracy: 0.34576271186440677
Threshold-2: 0.275
              precision    recall  f1-score   support

          -1       0.89      0.22      0.36       748
           0       0.43      0.14      0.21       183
           1       0.23      0.86      0.36       249

    accuracy                           0.34      1180
   macro avg       0.52      0.41      0.31      1180
weighted avg       0.68      0.34      0.33      1180

[[166  20 562]
 [  0  26 157]
 [ 21  14 214]]
Accuracy: 0.3440677966101695
Threshold-3: 0.3
              precision    rec