In [1]:
import numpy as np
import pandas as pd
import glob
import os, sys
import matplotlib.pyplot as plt
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
VERSION = 4
RANDOM_SEED = 26

## Load data

### Load Data - downsampled

In [2]:
# df_joint_train_org   = pd.read_csv(f'./features/cache_all_features_train_V{VERSION}.csv')
# df_joint_train_org   = df_joint_train_org.drop(columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])
                       
# df_joint_test_org = pd.read_csv(f'./features/cache_all_features_test_V{VERSION}.csv').drop(
#     columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])

# print("shape of train set: ", df_joint_train_org.shape)
# print("shape of test  set: ", df_joint_test_org.shape)


# df_joint_train  = pd.read_csv(f'./features/cache_train_V4_resampled_2500.csv')
df_joint_test = pd.read_csv(f'./cache_all_features_test_V4.csv')

# print("shape of train set: ", df_joint_train.shape)
print("shape of test  set: ", df_joint_test.shape)

df_joint_train_aug  = pd.read_csv(f'./cache_train_V4_augmented.csv')
feature_column_names = [i for i in df_joint_train_aug.columns \
                        if i not in ['file_path','renamed_file_path','split','sentiment_value','emotional_category']]
             
print("shape of train set: ", df_joint_train_aug.shape)
df_joint_train_aug.groupby('sentiment_value')['file_path'].count()

shape of test  set:  (1180, 1550)
shape of train set:  (24885, 1546)


sentiment_value
-1    7999
 0    8560
 1    8326
Name: file_path, dtype: int64

### best guess feature combinations

In [3]:
# generate selected features 
def generate_selected_features_by_type(feature_column_names,input,stats,number=1):
    selected_result = []
    for name in feature_column_names:
        if input+"_"+stats in name:
            selected_result.append(name)
    if number < len(selected_result):
        selected_result = selected_result[:number]
    return selected_result

# example to take mfcc 20 mean & std; mel32; zcr all 5 stats features
feature_MFCC20_mean  = generate_selected_features_by_type(feature_column_names,"mfcc","mean",20)
feature_MFCC20_std   = generate_selected_features_by_type(feature_column_names,"mfcc","std",20)
feature_mel32_median = generate_selected_features_by_type(feature_column_names,"mel32","median",32)
feature_mel32_std    = generate_selected_features_by_type(feature_column_names,"mel32","std",32)
feature_zcr_stats    = generate_selected_features_by_type(feature_column_names,"zcr","",5)
feature_rms_stats    = generate_selected_features_by_type(feature_column_names,"rms","",5)
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum','Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
selected_HNR = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
selected_prosody = selected_intensity + selected_pitch # + ['Local Jitter','Local Shimmer']
selected_feature_names = feature_MFCC20_mean + feature_MFCC20_std + feature_mel32_median + feature_mel32_std + \
                        feature_zcr_stats + feature_rms_stats + selected_intensity + selected_pitch 

In [4]:
# default use augmented training set and balanced test set
X_train = df_joint_train_aug[selected_feature_names]
y_train_s = df_joint_train_aug['sentiment_value']
y_train_e = df_joint_train_aug['emotional_category']

# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# y_e_num = label_encoder.fit_transform(y_train_e)

X_test = df_joint_test[selected_feature_names]
y_test_s = df_joint_test['sentiment_value']
y_test_e = df_joint_test['emotional_category']

# y_test_e_num = label_encoder.fit_transform(y_test_e)

In [5]:
X_train.shape, X_test.shape

((24885, 128), (1180, 128))

In [6]:
# !pip install lightgbm

## Models

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score, precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier,HistGradientBoostingClassifier
from lightgbm import LGBMClassifier

# Common adjustable parameters
common_params = {
    'RandomForest': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest1': {'n_estimators': 500, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest2': {'n_estimators': 1000, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest3': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 3, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest4': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 5, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest5': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 10, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest6': {'n_estimators': 100, 'criterion':'gini', 'max_depth': 20, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest7': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':50, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest8': {'n_estimators': 200, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':200, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest9': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':500, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'RandomForest10': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':5, 'random_state': RANDOM_SEED},
    'RandomForest11': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':10, 'random_state': RANDOM_SEED},
    'RandomForest12': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':False, 'n_jobs':3, 'random_state': RANDOM_SEED},

    
    
    'SVM': {'kernel': 'rbf', 'C': 1.0, 'probability': True},
    'KNN': {'n_neighbors': 2},
    'KNN1': {'n_neighbors': 3},
    'KNN2': {'n_neighbors': 4},
    'KNN3': {'n_neighbors': 5},
    'KNN4': {'n_neighbors': 6},
    'KNN5': {'n_neighbors': 8},
    'KNN6': {'n_neighbors': 10},
    'KNN7': {'n_neighbors': 12},
    'KNN8': {'n_neighbors': 15},
    'KNN9': {'n_neighbors': 20},
    'KNN10': {'n_neighbors': 30},
    'GradientBoosting': {'loss': 'log_loss', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                         'criterion': 'friedman_mse', 'min_samples_split': 2, 'max_depth': 3},
    'GradientBoostingFast': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 50},
    'GradientBoostingFast1': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 100},
    'GradientBoostingFast2': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 200},
    'GradientBoostingFast3': {'loss': 'log_loss', 'learning_rate': 0.05, 'max_iter': 50},
    'GradientBoostingFast4': {'loss': 'log_loss', 'learning_rate': 0.05, 'max_iter': 100},
    'GradientBoostingFast5': {'loss': 'log_loss', 'learning_rate': 0.05, 'max_iter': 200},
    'GradientBoostingFast6': {'loss': 'log_loss', 'learning_rate': 0.5, 'max_iter': 50},
    'GradientBoostingFast7': {'loss': 'log_loss', 'learning_rate': 0.5, 'max_iter': 100},
    'GradientBoostingFast8': {'loss': 'log_loss', 'learning_rate': 0.5, 'max_iter': 200},
    'GradientBoostingFast9': {'loss': 'log_loss', 'learning_rate': 0.01, 'max_iter': 50},
    'GradientBoostingFast10': {'loss': 'log_loss', 'learning_rate': 0.01, 'max_iter': 100},
    'GradientBoostingFast11': {'loss': 'log_loss', 'learning_rate': 0.01, 'max_iter': 200},
    'GradientBoostingFast12': {'loss': 'log_loss', 'learning_rate': 1, 'max_iter': 30},
    'GradientBoostingFast13': {'loss': 'log_loss', 'learning_rate': 1, 'max_iter': 50},
    'GradientBoostingFast14': {'loss': 'log_loss', 'learning_rate': 1, 'max_iter': 100},
    'AdaBoost': {'n_estimators': 10, 'learning_rate': 1.0},
    'AdaBoost1': {'n_estimators': 20, 'learning_rate': 1.0},
    'AdaBoost2': {'n_estimators': 50, 'learning_rate': 1.0},
    'AdaBoost3': {'n_estimators': 100, 'learning_rate': 1.0},
    'AdaBoost4': {'n_estimators': 50, 'learning_rate': 0.01},
    'AdaBoost5': {'n_estimators': 50, 'learning_rate': 0.05},
    'AdaBoost6': {'n_estimators': 50, 'learning_rate': 0.1},
    'AdaBoost7': {'n_estimators': 50, 'learning_rate': 0.5},
    'AdaBoost8': {'n_estimators': 50, 'learning_rate': 2.0},
    'AdaBoost9': {'n_estimators': 100, 'learning_rate': 0.01},
    'AdaBoost10': {'n_estimators': 100, 'learning_rate': 0.05},
    'AdaBoost11': {'n_estimators': 100, 'learning_rate': 0.1},
    'AdaBoost12': {'n_estimators': 100, 'learning_rate': 0.5},
    'AdaBoost13': {'n_estimators': 100, 'learning_rate': 2.0},
    'AdaBoost14': {'n_estimators': 500, 'learning_rate': 1.0},
    
    'LightGBM': {'boosting_type': 'gbdt', 'learning_rate': 0.001, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1},   
    'LightGBM1': {'boosting_type': 'gbdt', 'learning_rate': 0.01, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM2': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM3': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 50, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM4': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 500, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM5': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 1000, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM6': {'boosting_type': 'gbdt', 'learning_rate': 0.5, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM7': {'boosting_type': 'gbdt', 'learning_rate': 1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}, 
    'LightGBM8': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 50, 'max_depth': -1}, 
    'LightGBM9': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 100, 'max_depth': -1},  
    'LightGBM10': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': 5},
    'LightGBM11': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': 10},  
    'LightGBM12': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': 15}, 
    'LightGBM13': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': 20},
    'LightGBM14': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': 50} 
}

# Models with common adjustable parameters
dtree   = DecisionTreeClassifier()
rforest  = RandomForestClassifier(**common_params['RandomForest'])
rforest1 = RandomForestClassifier(**common_params['RandomForest1'])
rforest2 = RandomForestClassifier(**common_params['RandomForest2'])
rforest3 = RandomForestClassifier(**common_params['RandomForest3'])
rforest4 = RandomForestClassifier(**common_params['RandomForest4'])
rforest5 = RandomForestClassifier(**common_params['RandomForest5'])
rforest6 = RandomForestClassifier(**common_params['RandomForest6'])
rforest7 = RandomForestClassifier(**common_params['RandomForest7'])
rforest8 = RandomForestClassifier(**common_params['RandomForest8'])
rforest9 = RandomForestClassifier(**common_params['RandomForest9'])
rforest10 = RandomForestClassifier(**common_params['RandomForest10'])
rforest11 = RandomForestClassifier(**common_params['RandomForest11'])
rforest12 = RandomForestClassifier(**common_params['RandomForest12'])

svm     = SVC(**common_params['SVM'])
knn     = KNeighborsClassifier(**common_params['KNN'])
knn1     = KNeighborsClassifier(**common_params['KNN1'])
knn2     = KNeighborsClassifier(**common_params['KNN2'])
knn3     = KNeighborsClassifier(**common_params['KNN3'])
knn4     = KNeighborsClassifier(**common_params['KNN4'])
knn5     = KNeighborsClassifier(**common_params['KNN5'])
knn6     = KNeighborsClassifier(**common_params['KNN6'])
knn7     = KNeighborsClassifier(**common_params['KNN7'])
knn8     = KNeighborsClassifier(**common_params['KNN8'])
knn9     = KNeighborsClassifier(**common_params['KNN9'])
knn10     = KNeighborsClassifier(**common_params['KNN10'])
gboost  = GradientBoostingClassifier(**common_params['GradientBoosting'])
gb_fast = HistGradientBoostingClassifier(**common_params['GradientBoostingFast'])
gb_fast1 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast1'])
gb_fast2 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast2'])
gb_fast3 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast3'])
gb_fast4 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast4'])
gb_fast5 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast5'])
gb_fast6 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast6'])
gb_fast7 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast7'])
gb_fast8 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast8'])
gb_fast9 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast9'])
gb_fast10 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast10'])
gb_fast11 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast11'])
gb_fast12 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast12'])
gb_fast13 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast13'])
gb_fast14 = HistGradientBoostingClassifier(**common_params['GradientBoostingFast14'])
adaBoost= AdaBoostClassifier(**common_params['AdaBoost'])
adaBoost1= AdaBoostClassifier(**common_params['AdaBoost1'])
adaBoost2= AdaBoostClassifier(**common_params['AdaBoost2'])
adaBoost3= AdaBoostClassifier(**common_params['AdaBoost3'])
adaBoost4= AdaBoostClassifier(**common_params['AdaBoost4'])
adaBoost5= AdaBoostClassifier(**common_params['AdaBoost5'])
adaBoost6= AdaBoostClassifier(**common_params['AdaBoost6'])
adaBoost7= AdaBoostClassifier(**common_params['AdaBoost7'])
adaBoost8= AdaBoostClassifier(**common_params['AdaBoost8'])
adaBoost9= AdaBoostClassifier(**common_params['AdaBoost9'])
adaBoost10= AdaBoostClassifier(**common_params['AdaBoost10'])
adaBoost11= AdaBoostClassifier(**common_params['AdaBoost11'])
adaBoost12= AdaBoostClassifier(**common_params['AdaBoost12'])
adaBoost13= AdaBoostClassifier(**common_params['AdaBoost13'])
adaBoost14= AdaBoostClassifier(**common_params['AdaBoost14'])
lightgbm=LGBMClassifier(**common_params['LightGBM'])
lightgbm1=LGBMClassifier(**common_params['LightGBM1'])
lightgbm2=LGBMClassifier(**common_params['LightGBM2'])
lightgbm3=LGBMClassifier(**common_params['LightGBM3'])
lightgbm4=LGBMClassifier(**common_params['LightGBM4'])
lightgbm5=LGBMClassifier(**common_params['LightGBM5'])
lightgbm6=LGBMClassifier(**common_params['LightGBM6'])
lightgbm7=LGBMClassifier(**common_params['LightGBM7'])
lightgbm8=LGBMClassifier(**common_params['LightGBM8'])
lightgbm9=LGBMClassifier(**common_params['LightGBM9'])
lightgbm10=LGBMClassifier(**common_params['LightGBM10'])
lightgbm11=LGBMClassifier(**common_params['LightGBM11'])
lightgbm12=LGBMClassifier(**common_params['LightGBM12'])
lightgbm13=LGBMClassifier(**common_params['LightGBM13'])
lightgbm14=LGBMClassifier(**common_params['LightGBM14'])

In [8]:
# def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test):
#     start = time.time()
#     print(f'Model Name: {clf_model.__class__};\n Train set shape {X_train.shape}, num of class {y_train.unique().size}')
#     predictions = clf_model.fit(X_train, y_train).predict(X_test.values)
    
#     print(classification_report(y_test, predictions))
#     print(confusion_matrix(y_test, predictions))
    
#     precision, recall, f1score, support = precision_recall_fscore_support(y_test, predictions, average=None)
#     # TODO make all metrics into result dict for recording 
#     probabilities = clf_model.predict_proba(X_test.values)
#     print('prbabilities distribution: \n', pd.DataFrame(probabilities,columns=clf_model.classes_).describe())
#     print(f'Time taken: {round(time.time()-start,3)} seconds.\n')

In [9]:
import pickle

def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test,verbose=True):
    start = time.time()
    
    clf_model.fit(X_train, y_train)
    predictions = clf_model.predict(X_test.values)
    
    # Calculate metrics
    report = classification_report(y_test, predictions, output_dict=True)
    metrics = {
        'accuracy': report['accuracy'],
        'precision': report['macro avg']['precision'],
        'recall': report['macro avg']['recall'],
        'f1-score': report['macro avg']['f1-score']
    }
    for class_name in report.keys():
        if class_name not in ['accuracy', 'macro avg', 'weighted avg']:
            metrics[class_name+'_precision'] = report[class_name]['precision']
            metrics[class_name+'_recall'] = report[class_name]['recall'],
            metrics[class_name+'_f1-score'] = report[class_name]['f1-score']
    
    feature_columns = list(X_train.columns)
    num_classes = y_train.nunique()
    class_names = list(y_train.unique())
    
    model_filename = f"./models/{clf_model.__class__.__name__}_model"
    model_filename += f"_{num_classes}cls_{len(feature_columns)}feat_{round(report['accuracy']*100)}acc.pkl"
    with open(model_filename, 'wb') as file:
        pickle.dump(clf_model, file)
    
    results = {**metrics,
        'num_classes': num_classes,
        'class_names': class_names,
        'model_filename': model_filename,
        'feature_columns': feature_columns,
    }
    
    if verbose:
        print(f"Model Name: {clf_model.__class__.__name__};\nTrain set shape {X_train.shape}, num of class {num_classes}")
        print(classification_report(y_test, predictions))
        print(confusion_matrix(y_test, predictions))
        probabilities = clf_model.predict_proba(X_test.values)
        print('Probabilities distribution:\n', pd.DataFrame(probabilities, columns=clf_model.classes_).describe())
    print(f"Model: {clf_model.__class__.__name__};Time taken: {round(time.time()-start, 3)} seconds.\n")

    return results, clf_model


### Sentiment 3-class Classifier Sample code

In [10]:
result, m_trained = exp_clf_with_feature_selected(lightgbm, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.92      0.51      0.66       748
           0       0.37      0.92      0.53       183
           1       0.55      0.69      0.61       249

    accuracy                           0.61      1180
   macro avg       0.61      0.71      0.60      1180
weighted avg       0.76      0.61      0.63      1180

[[383 232 133]
 [  8 168   7]
 [ 25  53 171]]
Probabilities distribution:
                 -1

In [11]:
result, m_trained = exp_clf_with_feature_selected(lightgbm1, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.87      0.73      0.80       748
           0       0.51      0.88      0.65       183
           1       0.66      0.63      0.65       249

    accuracy                           0.73      1180
   macro avg       0.68      0.75      0.70      1180
weighted avg       0.77      0.73      0.74      1180

[[547 124  77]
 [ 19 161   3]
 [ 60  31 158]]
Probabilities distribution:
                 -1

In [12]:
result, m_trained = exp_clf_with_feature_selected(lightgbm2, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017812 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.86      0.85       748
           0       0.64      0.79      0.70       183
           1       0.80      0.61      0.70       249

    accuracy                           0.79      1180
   macro avg       0.76      0.75      0.75      1180
weighted avg       0.80      0.79      0.79      1180

[[641  71  36]
 [ 37 144   2]
 [ 85  11 153]]
Probabilities distribution:
                 -1

In [13]:
result, m_trained = exp_clf_with_feature_selected(lightgbm3, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.85      0.84      0.84       748
           0       0.61      0.80      0.69       183
           1       0.78      0.61      0.69       249

    accuracy                           0.79      1180
   macro avg       0.75      0.75      0.74      1180
weighted avg       0.80      0.79      0.79      1180

[[630  79  39]
 [ 34 146   3]
 [ 80  16 153]]
Probabilities distribution:
                 -1

In [14]:
result, m_trained = exp_clf_with_feature_selected(lightgbm4, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019421 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.83      0.89      0.86       748
           0       0.70      0.74      0.72       183
           1       0.83      0.59      0.69       249

    accuracy                           0.81      1180
   macro avg       0.78      0.74      0.76      1180
weighted avg       0.81      0.81      0.80      1180

[[669  51  28]
 [ 44 136   3]
 [ 93   8 148]]
Probabilities distribution:
                 -1

In [15]:
result, m_trained = exp_clf_with_feature_selected(lightgbm5, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017370 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.82      0.91      0.86       748
           0       0.69      0.72      0.70       183
           1       0.86      0.59      0.70       249

    accuracy                           0.81      1180
   macro avg       0.79      0.74      0.76      1180
weighted avg       0.81      0.81      0.80      1180

[[677  50  21]
 [ 50 131   2]
 [ 94   8 147]]
Probabilities distribution:
                  -

In [16]:
result, m_trained = exp_clf_with_feature_selected(lightgbm6, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.83      0.88      0.86       748
           0       0.67      0.72      0.69       183
           1       0.81      0.62      0.70       249

    accuracy                           0.80      1180
   macro avg       0.77      0.74      0.75      1180
weighted avg       0.80      0.80      0.80      1180

[[661  55  32]
 [ 47 131   5]
 [ 85  10 154]]
Probabilities distribution:
                 -1

In [17]:
result, m_trained = exp_clf_with_feature_selected(lightgbm7, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018993 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.88      0.86       748
           0       0.67      0.74      0.71       183
           1       0.80      0.61      0.69       249

    accuracy                           0.80      1180
   macro avg       0.77      0.75      0.75      1180
weighted avg       0.80      0.80      0.80      1180

[[660  55  33]
 [ 43 136   4]
 [ 86  11 152]]
Probabilities distribution:
                  -

In [18]:
result, m_trained = exp_clf_with_feature_selected(lightgbm8, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.86      0.85       748
           0       0.64      0.77      0.70       183
           1       0.78      0.62      0.69       249

    accuracy                           0.79      1180
   macro avg       0.75      0.75      0.75      1180
weighted avg       0.80      0.79      0.79      1180

[[642  65  41]
 [ 40 141   2]
 [ 81  14 154]]
Probabilities distribution:
                 -1

In [19]:
result, m_trained = exp_clf_with_feature_selected(lightgbm9, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.85      0.85       748
           0       0.64      0.79      0.71       183
           1       0.78      0.61      0.68       249

    accuracy                           0.79      1180
   macro avg       0.75      0.75      0.74      1180
weighted avg       0.80      0.79      0.79      1180

[[639  69  40]
 [ 36 144   3]
 [ 86  12 151]]
Probabilities distribution:
                 -1

In [20]:
result, m_trained = exp_clf_with_feature_selected(lightgbm10, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019449 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882




Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.86      0.84      0.85       748
           0       0.61      0.83      0.70       183
           1       0.79      0.64      0.71       249

    accuracy                           0.79      1180
   macro avg       0.75      0.77      0.75      1180
weighted avg       0.81      0.79      0.80      1180

[[628  79  41]
 [ 31 151   1]
 [ 74  16 159]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.564940     0.207341     0.227719
std       0.320739     0.282854     0.284511
min       0.001474     0.000088     0.000468
25%       0.290540     0.002579     0.027293
50%       0.606327     0.050490     0.099433
75%       0.871501     0.347645     0.323708
max       0.996593     0.997603     0.996970
Model: LGBMClassifier;Time taken: 3.181 seconds.



In [21]:
result, m_trained = exp_clf_with_feature_selected(lightgbm11, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019368 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.84      0.84       748
           0       0.61      0.79      0.69       183
           1       0.78      0.62      0.69       249

    accuracy                           0.79      1180
   macro avg       0.75      0.75      0.74      1180
weighted avg       0.79      0.79      0.79      1180

[[628  79  41]
 [ 36 145   2]
 [ 83  12 154]]
Probabilities distribution:
                 -1

In [22]:
result, m_trained = exp_clf_with_feature_selected(lightgbm12, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.84      0.84       748
           0       0.61      0.79      0.69       183
           1       0.77      0.61      0.68       249

    accuracy                           0.79      1180
   macro avg       0.74      0.75      0.74      1180
weighted avg       0.79      0.79      0.79      1180

[[632  75  41]
 [ 35 145   3]
 [ 82  16 151]]
Probabilities distribution:
                 -1

In [23]:
result, m_trained = exp_clf_with_feature_selected(lightgbm13, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018692 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.86      0.85       748
           0       0.64      0.79      0.70       183
           1       0.80      0.61      0.70       249

    accuracy                           0.79      1180
   macro avg       0.76      0.75      0.75      1180
weighted avg       0.80      0.79      0.79      1180

[[641  71  36]
 [ 37 144   2]
 [ 85  11 153]]
Probabilities distribution:
                 -1

In [24]:
result, m_trained = exp_clf_with_feature_selected(lightgbm14, X_train, X_test, y_train_s, y_test_s)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020599 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.84      0.86      0.85       748
           0       0.64      0.79      0.70       183
           1       0.80      0.61      0.70       249

    accuracy                           0.79      1180
   macro avg       0.76      0.75      0.75      1180
weighted avg       0.80      0.79      0.79      1180

[[641  71  36]
 [ 37 144   2]
 [ 85  11 153]]
Probabilities distribution:
                 -1

In [25]:
# change model as the first parameter in the function 
# result, m_trained = exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_s, y_test_s)

In [26]:
# result, m_trained = exp_clf_with_feature_selected(adaBoost, X_train, X_test, y_train_s, y_test_s)

In [27]:
# result, m_trained = exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_s, y_test_s)

In [28]:
# result, m_trained = exp_clf_with_feature_selected(knn, X_train, X_test, y_train_s, y_test_s)

In [29]:
# result, m_trained = exp_clf_with_feature_selected(svm, X_train, X_test, y_train_s, y_test_s)

In [30]:
# result, m_trained = exp_clf_with_feature_selected(rforest, X_train, X_test, y_train_s, y_test_s)

In [31]:
# result, m_trained = exp_clf_with_feature_selected(lightgbm, X_train, X_test, y_train_s, y_test_s)

In [32]:
# result, m_trained = exp_clf_with_feature_selected(dtree, X_train, X_test, y_train_s, y_test_s)

### How to save experiment metrics result

In [33]:
exp_results = []
for clf_model in [lightgbm,lightgbm1,lightgbm2,lightgbm3,lightgbm4,lightgbm5,lightgbm6,lightgbm7,lightgbm8,lightgbm9,lightgbm10,lightgbm11,lightgbm12,lightgbm13,lightgbm14]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022457 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model: LGBMClassifier;Time taken: 5.004 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019216 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model: LGBMClassifier;Time taken: 5.213 seco



Model: LGBMClassifier;Time taken: 3.544 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022737 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.094882
Model: LGBMClassifier;Time taken: 5.043 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -1.134949
[LightGBM] [Info] Start training from score -1.067165
[LightGBM] [Info] Start training from score -1.0

Model: LGBMClassifier;Time taken: 5.611 seconds.



Unnamed: 0,accuracy,precision,recall,f1-score,-1_precision,-1_recall,-1_f1-score,0_precision,0_recall,0_f1-score,1_precision,1_recall,1_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.611864,0.613791,0.705604,0.599031,0.920673,"(0.5120320855614974,)",0.658076,0.370861,"(0.9180327868852459,)",0.528302,0.549839,"(0.6867469879518072,)",0.610714,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_61a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.733898,0.682387,0.748534,0.696792,0.873802,"(0.7312834224598931,)",0.796215,0.509494,"(0.8797814207650273,)",0.645291,0.663866,"(0.6345381526104418,)",0.648871,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_73a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.794915,0.75944,0.752765,0.749352,0.840105,"(0.856951871657754,)",0.848445,0.637168,"(0.7868852459016393,)",0.704156,0.801047,"(0.6144578313253012,)",0.695455,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_79a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.787288,0.745733,0.751506,0.740791,0.846774,"(0.8422459893048129,)",0.844504,0.605809,"(0.7978142076502732,)",0.688679,0.784615,"(0.6144578313253012,)",0.689189,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_79a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.807627,0.784759,0.743977,0.75739,0.830025,"(0.8943850267379679,)",0.861004,0.697436,"(0.7431693989071039,)",0.719577,0.826816,"(0.5943775100401606,)",0.691589,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_81a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.809322,0.794144,0.737096,0.756314,0.824604,"(0.9050802139037433,)",0.86297,0.693122,"(0.7158469945355191,)",0.704301,0.864706,"(0.5903614457831325,)",0.701671,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_81a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.801695,0.769398,0.739337,0.749726,0.833544,"(0.8836898395721925,)",0.857884,0.668367,"(0.7158469945355191,)",0.691293,0.806283,"(0.6184738955823293,)",0.7,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_80a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.80339,0.771334,0.745321,0.753124,0.836502,"(0.8823529411764706,)",0.858816,0.673267,"(0.7431693989071039,)",0.706494,0.804233,"(0.6104417670682731,)",0.694064,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_80a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.794068,0.754683,0.749085,0.746701,0.841415,"(0.8582887700534759,)",0.849768,0.640909,"(0.7704918032786885,)",0.699752,0.781726,"(0.6184738955823293,)",0.690583,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_79a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.791525,0.752678,0.749196,0.744839,0.839685,"(0.8542780748663101,)",0.846918,0.64,"(0.7868852459016393,)",0.705882,0.778351,"(0.606425702811245,)",0.681716,3,"[-1, 1, 0]",./models/LGBMClassifier_model_3cls_128feat_79a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [34]:
# exp_results = []
# for clf_model in [rforest,adaBoost,gb_fast,gboost,knn,svm,lightgbm,dtree]:
#     result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
#     exp_results.append(result)
# pd.DataFrame(exp_results)

In [35]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-lightgbm.xlsx")

### Emotion 8-class

In [36]:
exp_results = []
for clf_model in [lightgbm,lightgbm1,lightgbm2,lightgbm3,lightgbm4,lightgbm5,lightgbm6,lightgbm7,lightgbm8,lightgbm9,lightgbm10,lightgbm11,lightgbm12,lightgbm13,lightgbm14]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_e, y_test_e,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model: LGBMClassifier;Time taken: 14.716 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of d



Model: LGBMClassifier;Time taken: 73.197 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020416 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927


























Model: LGBMClassifier;Time taken: 105.837 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020431 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model: LGBMClassifier;Time taken: 10.33 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020293 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [I







Model: LGBMClassifier;Time taken: 7.178 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021952 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model: LGBMClassifier;Time taken: 12.511 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [In











Model: LGBMClassifier;Time taken: 8.427 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019816 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model: LGBMClassifier;Time taken: 12.257 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022672 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [In

Unnamed: 0,accuracy,precision,recall,f1-score,Anger_precision,Anger_recall,Anger_f1-score,Calmness_precision,Calmness_recall,Calmness_f1-score,...,Sadness_precision,Sadness_recall,Sadness_f1-score,Surprise_precision,Surprise_recall,Surprise_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.200847,0.107963,0.171711,0.095127,0.0,"(0.0,)",0.0,0.0,"(0.0,)",0.0,...,0.0,"(0.0,)",0.0,0.0,"(0.0,)",0.0,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_20a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.568644,0.687333,0.608094,0.585387,0.842857,"(0.6310160427807486,)",0.721713,0.611111,"(0.6875,)",0.647059,...,0.705882,"(0.5775401069518716,)",0.635294,0.728571,"(0.8225806451612904,)",0.772727,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_57a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.660169,0.689906,0.68607,0.681102,0.813953,"(0.7486631016042781,)",0.779944,0.705882,"(0.75,)",0.727273,...,0.636816,"(0.6844919786096256,)",0.659794,0.864407,"(0.8225806451612904,)",0.842975,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_66a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.644068,0.673784,0.672181,0.661261,0.815476,"(0.732620320855615,)",0.771831,0.631579,"(0.75,)",0.685714,...,0.649746,"(0.6844919786096256,)",0.666667,0.847458,"(0.8064516129032258,)",0.826446,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_64a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.661017,0.68857,0.700786,0.687995,0.805556,"(0.7754010695187166,)",0.790191,0.666667,"(0.875,)",0.756757,...,0.63,"(0.6737967914438503,)",0.651163,0.927273,"(0.8225806451612904,)",0.871795,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_66a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.667797,0.688292,0.707481,0.690831,0.793478,"(0.7807486631016043,)",0.787062,0.636364,"(0.875,)",0.736842,...,0.623762,"(0.6737967914438503,)",0.647815,0.912281,"(0.8387096774193549,)",0.87395,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_67a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.45,0.45381,0.474012,0.446073,0.638462,"(0.44385026737967914,)",0.523659,0.444444,"(0.5,)",0.470588,...,0.559322,"(0.5294117647058824,)",0.543956,0.261438,"(0.6451612903225806,)",0.372093,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_45a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.305932,0.306844,0.250837,0.214399,0.455446,"(0.7379679144385026,)",0.563265,0.0,"(0.0,)",0.0,...,0.702703,"(0.13903743315508021,)",0.232143,0.0,"(0.0,)",0.0,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_31a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.661017,0.695947,0.69507,0.688727,0.794444,"(0.7647058823529411,)",0.779292,0.722222,"(0.8125,)",0.764706,...,0.634518,"(0.6684491978609626,)",0.651042,0.912281,"(0.8387096774193549,)",0.87395,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_66a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.65678,0.688716,0.704508,0.689264,0.840237,"(0.7593582887700535,)",0.797753,0.714286,"(0.9375,)",0.810811,...,0.649746,"(0.6844919786096256,)",0.666667,0.894737,"(0.8225806451612904,)",0.857143,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/LGBMClassifier_model_8cls_128feat_66a...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [37]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-lightgbm-8.xlsx")

In [38]:
exp_clf_with_feature_selected(lightgbm, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020861 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00       187
    Calmness       0.00      0.00      0.00        16
     Disgust       0.00      0.00      0.

({'accuracy': 0.20084745762711864,
  'precision': 0.10796296296296296,
  'recall': 0.17171139005411637,
  'f1-score': 0.09512656158753133,
  'Anger_precision': 0.0,
  'Anger_recall': (0.0,),
  'Anger_f1-score': 0.0,
  'Calmness_precision': 0.0,
  'Calmness_recall': (0.0,),
  'Calmness_f1-score': 0.0,
  'Disgust_precision': 0.0,
  'Disgust_recall': (0.0,),
  'Disgust_f1-score': 0.0,
  'Fear_precision': 0.0,
  'Fear_recall': (0.0,),
  'Fear_f1-score': 0.0,
  'Happiness_precision': 0.71,
  'Happiness_recall': (0.37967914438502676,),
  'Happiness_f1-score': 0.4947735191637631,
  'Neutrality_precision': 0.1537037037037037,
  'Neutrality_recall': (0.9940119760479041,),
  'Neutrality_f1-score': 0.26623897353648757,
  'Sadness_precision': 0.0,
  'Sadness_recall': (0.0,),
  'Sadness_f1-score': 0.0,
  'Surprise_precision': 0.0,
  'Surprise_recall': (0.0,),
  'Surprise_f1-score': 0.0,
  'num_classes': 8,
  'class_names': ['Disgust',
   'Anger',
   'Fear',
   'Sadness',
   'Happiness',
   'Surpris

In [39]:
exp_clf_with_feature_selected(lightgbm1, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022470 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.84      0.63      0.72       187
    Calmness       0.61      0.69      0.65        16
     Disgust       0.89      0.22      0.

({'accuracy': 0.5686440677966101,
  'precision': 0.6873333615038665,
  'recall': 0.6080938729535925,
  'f1-score': 0.5853866982032756,
  'Anger_precision': 0.8428571428571429,
  'Anger_recall': (0.6310160427807486,),
  'Anger_f1-score': 0.7217125382262997,
  'Calmness_precision': 0.6111111111111112,
  'Calmness_recall': (0.6875,),
  'Calmness_f1-score': 0.6470588235294118,
  'Disgust_precision': 0.8913043478260869,
  'Disgust_recall': (0.2192513368983957,),
  'Disgust_f1-score': 0.351931330472103,
  'Fear_precision': 0.8805970149253731,
  'Fear_recall': (0.3155080213903743,),
  'Fear_f1-score': 0.4645669291338582,
  'Happiness_precision': 0.45774647887323944,
  'Happiness_recall': (0.6951871657754011,),
  'Happiness_f1-score': 0.5520169851380042,
  'Neutrality_precision': 0.3805970149253731,
  'Neutrality_recall': (0.9161676646706587,),
  'Neutrality_f1-score': 0.5377855887521968,
  'Sadness_precision': 0.7058823529411765,
  'Sadness_recall': (0.5775401069518716,),
  'Sadness_f1-score'

In [40]:
exp_clf_with_feature_selected(lightgbm2, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.75      0.78       187
    Calmness       0.71      0.75      0.73        16
     Disgust       0.67      0.45      0.

({'accuracy': 0.6601694915254237,
  'precision': 0.6899064820798478,
  'recall': 0.6860695225901483,
  'f1-score': 0.6811020880092568,
  'Anger_precision': 0.813953488372093,
  'Anger_recall': (0.7486631016042781,),
  'Anger_f1-score': 0.7799442896935934,
  'Calmness_precision': 0.7058823529411765,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.7272727272727272,
  'Disgust_precision': 0.6666666666666666,
  'Disgust_recall': (0.44919786096256686,),
  'Disgust_f1-score': 0.536741214057508,
  'Fear_precision': 0.6463414634146342,
  'Fear_recall': (0.5668449197860963,),
  'Fear_f1-score': 0.6039886039886039,
  'Happiness_precision': 0.6455026455026455,
  'Happiness_recall': (0.6524064171122995,),
  'Happiness_f1-score': 0.648936170212766,
  'Neutrality_precision': 0.5396825396825397,
  'Neutrality_recall': (0.8143712574850299,),
  'Neutrality_f1-score': 0.6491646778042959,
  'Sadness_precision': 0.6368159203980099,
  'Sadness_recall': (0.6844919786096256,),
  'Sadness_f1-score': 0.

In [41]:
exp_clf_with_feature_selected(lightgbm3, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.028195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.82      0.73      0.77       187
    Calmness       0.63      0.75      0.69        16
     Disgust       0.69      0.40      0.

({'accuracy': 0.6440677966101694,
  'precision': 0.673783635207502,
  'recall': 0.6721814155886949,
  'f1-score': 0.6612614810666972,
  'Anger_precision': 0.8154761904761905,
  'Anger_recall': (0.732620320855615,),
  'Anger_f1-score': 0.7718309859154929,
  'Calmness_precision': 0.631578947368421,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.6857142857142857,
  'Disgust_precision': 0.6851851851851852,
  'Disgust_recall': (0.39572192513368987,),
  'Disgust_f1-score': 0.5016949152542373,
  'Fear_precision': 0.6870748299319728,
  'Fear_recall': (0.5401069518716578,),
  'Fear_f1-score': 0.6047904191616766,
  'Happiness_precision': 0.5529953917050692,
  'Happiness_recall': (0.6417112299465241,),
  'Happiness_f1-score': 0.594059405940594,
  'Neutrality_precision': 0.5207547169811321,
  'Neutrality_recall': (0.8263473053892215,),
  'Neutrality_f1-score': 0.638888888888889,
  'Sadness_precision': 0.649746192893401,
  'Sadness_recall': (0.6844919786096256,),
  'Sadness_f1-score': 0.666

In [42]:
exp_clf_with_feature_selected(lightgbm4, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023664 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927




Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.78      0.79       187
    Calmness       0.67      0.88      0.76        16
     Disgust       0.67      0.47      0.55       187
        Fear       0.59      0.59      0.59       187
   Happiness       0.64      0.60      0.62       187
  Neutrality       0.58      0.80      0.67       167
     Sadness       0.63      0.67      0.65       187
    Surprise       0.93      0.82      0.87        62

    accuracy                           0.66      1180
   macro avg       0.69      0.70      0.69      1180
weighted avg       0.67      0.66      0.66      1180

[[145   0  12   6  22   1   1   0]
 [  0  14   0   0   0   0   2   0]
 [  8   0  88  20  16  41  14   0]
 [  6   0   6 110  15  15  34   1]
 [ 21   1   9  22 113  14   5   2]
 [  0   2   4   8   4 133  16   0]
 [  0   4  10  17   2  27 126   1]
 [  0   0   3   2   4   0   2  

({'accuracy': 0.6610169491525424,
  'precision': 0.6885699051324051,
  'recall': 0.7007859120038343,
  'f1-score': 0.6879947978200851,
  'Anger_precision': 0.8055555555555556,
  'Anger_recall': (0.7754010695187166,),
  'Anger_f1-score': 0.7901907356948229,
  'Calmness_precision': 0.6666666666666666,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.7567567567567567,
  'Disgust_precision': 0.6666666666666666,
  'Disgust_recall': (0.47058823529411764,),
  'Disgust_f1-score': 0.5517241379310345,
  'Fear_precision': 0.5945945945945946,
  'Fear_recall': (0.5882352941176471,),
  'Fear_f1-score': 0.5913978494623656,
  'Happiness_precision': 0.6420454545454546,
  'Happiness_recall': (0.6042780748663101,),
  'Happiness_f1-score': 0.6225895316804407,
  'Neutrality_precision': 0.5757575757575758,
  'Neutrality_recall': (0.7964071856287425,),
  'Neutrality_f1-score': 0.6683417085427136,
  'Sadness_precision': 0.63,
  'Sadness_recall': (0.6737967914438503,),
  'Sadness_f1-score': 0.6511627906

In [43]:
exp_clf_with_feature_selected(lightgbm5, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026977 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
























Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.79      0.78      0.79       187
    Calmness       0.64      0.88      0.74        16
     Disgust       0.66      0.46      0.54       187
        Fear       0.61      0.62      0.61       187
   Happiness       0.68      0.61      0.65       187
  Neutrality       0.59      0.80      0.68       167
     Sadness       0.62      0.67      0.65       187
    Surprise       0.91      0.84      0.87        62

    accuracy                           0.67      1180
   macro avg       0.69      0.71      0.69      1180
weighted avg       0.67      0.67      0.66      1180

[[146   0  15   6  18   1   1   0]
 [  0  14   0   0   0   0   2   0]
 [  9   1  86  23  14  39  15   0]
 [  5   0   5 116  11  13  35   2]
 [ 23   1   9  19 115  14   4   2]
 [  0   2   3   8   3 133  18   0]
 [  0   4   9  17   5  25 126   1]
 [  1   0   3   2   3   0   1  

({'accuracy': 0.6677966101694915,
  'precision': 0.6882922204484733,
  'recall': 0.7074811854211192,
  'f1-score': 0.6908313498221231,
  'Anger_precision': 0.7934782608695652,
  'Anger_recall': (0.7807486631016043,),
  'Anger_f1-score': 0.7870619946091644,
  'Calmness_precision': 0.6363636363636364,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.7368421052631579,
  'Disgust_precision': 0.6615384615384615,
  'Disgust_recall': (0.45989304812834225,),
  'Disgust_f1-score': 0.5425867507886436,
  'Fear_precision': 0.6073298429319371,
  'Fear_recall': (0.6203208556149733,),
  'Fear_f1-score': 0.6137566137566138,
  'Happiness_precision': 0.6804733727810651,
  'Happiness_recall': (0.6149732620320856,),
  'Happiness_f1-score': 0.6460674157303371,
  'Neutrality_precision': 0.5911111111111111,
  'Neutrality_recall': (0.7964071856287425,),
  'Neutrality_f1-score': 0.6785714285714286,
  'Sadness_precision': 0.6237623762376238,
  'Sadness_recall': (0.6737967914438503,),
  'Sadness_f1-score'

In [44]:
exp_clf_with_feature_selected(lightgbm6, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.028652 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.64      0.44      0.52       187
    Calmness       0.44      0.50      0.47        16
     Disgust       0.42      0.25      0.

({'accuracy': 0.45,
  'precision': 0.4538104651677561,
  'recall': 0.47401150347226884,
  'f1-score': 0.44607348396148994,
  'Anger_precision': 0.6384615384615384,
  'Anger_recall': (0.44385026737967914,),
  'Anger_f1-score': 0.5236593059936908,
  'Calmness_precision': 0.4444444444444444,
  'Calmness_recall': (0.5,),
  'Calmness_f1-score': 0.47058823529411764,
  'Disgust_precision': 0.415929203539823,
  'Disgust_recall': (0.25133689839572193,),
  'Disgust_f1-score': 0.3133333333333333,
  'Fear_precision': 0.4899328859060403,
  'Fear_recall': (0.39037433155080214,),
  'Fear_f1-score': 0.43452380952380953,
  'Happiness_precision': 0.39901477832512317,
  'Happiness_recall': (0.43315508021390375,),
  'Happiness_f1-score': 0.4153846153846154,
  'Neutrality_precision': 0.4219409282700422,
  'Neutrality_recall': (0.5988023952095808,),
  'Neutrality_f1-score': 0.495049504950495,
  'Sadness_precision': 0.559322033898305,
  'Sadness_recall': (0.5294117647058824,),
  'Sadness_f1-score': 0.5439560

In [45]:
exp_clf_with_feature_selected(lightgbm7, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927




Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.46      0.74      0.56       187
    Calmness       0.00      0.00      0.00        16
     Disgust       0.20      0.20      0.20       187
        Fear       0.44      0.17      0.25       187
   Happiness       0.39      0.05      0.09       187
  Neutrality       0.27      0.71      0.39       167
     Sadness       0.70      0.14      0.23       187
    Surprise       0.00      0.00      0.00        62

    accuracy                           0.31      1180
   macro avg       0.31      0.25      0.21      1180
weighted avg       0.38      0.31      0.27      1180

[[138   5   6   7   9  19   1   2]
 [  0   0   5   0   0  11   0   0]
 [ 28  10  37   7   1  95   0   9]
 [ 46   4  29  32   2  55   3  16]
 [ 58   7  25  16   9  59   4   9]
 [  5   1  33   2   0 119   2   5]
 [  9   4  35   6   0  74  26  33]
 [ 19   9  16   3   2  12   1  

({'accuracy': 0.3059322033898305,
  'precision': 0.3068439385834526,
  'recall': 0.25083656216977807,
  'f1-score': 0.21439913553671733,
  'Anger_precision': 0.45544554455445546,
  'Anger_recall': (0.7379679144385026,),
  'Anger_f1-score': 0.563265306122449,
  'Calmness_precision': 0.0,
  'Calmness_recall': (0.0,),
  'Calmness_f1-score': 0.0,
  'Disgust_precision': 0.1989247311827957,
  'Disgust_recall': (0.19786096256684493,),
  'Disgust_f1-score': 0.19839142091152814,
  'Fear_precision': 0.4383561643835616,
  'Fear_recall': (0.1711229946524064,),
  'Fear_f1-score': 0.24615384615384614,
  'Happiness_precision': 0.391304347826087,
  'Happiness_recall': (0.0481283422459893,),
  'Happiness_f1-score': 0.0857142857142857,
  'Neutrality_precision': 0.268018018018018,
  'Neutrality_recall': (0.7125748502994012,),
  'Neutrality_f1-score': 0.3895253682487725,
  'Sadness_precision': 0.7027027027027027,
  'Sadness_recall': (0.13903743315508021,),
  'Sadness_f1-score': 0.23214285714285715,
  'Sur

In [46]:
exp_clf_with_feature_selected(lightgbm8, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025209 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.79      0.76      0.78       187
    Calmness       0.72      0.81      0.76        16
     Disgust       0.70      0.46      0.

({'accuracy': 0.6610169491525424,
  'precision': 0.6959474847492827,
  'recall': 0.6950695948322434,
  'f1-score': 0.6887267034448051,
  'Anger_precision': 0.7944444444444444,
  'Anger_recall': (0.7647058823529411,),
  'Anger_f1-score': 0.7792915531335151,
  'Calmness_precision': 0.7222222222222222,
  'Calmness_recall': (0.8125,),
  'Calmness_f1-score': 0.7647058823529411,
  'Disgust_precision': 0.6991869918699187,
  'Disgust_recall': (0.45989304812834225,),
  'Disgust_f1-score': 0.5548387096774193,
  'Fear_precision': 0.6385542168674698,
  'Fear_recall': (0.5668449197860963,),
  'Fear_f1-score': 0.6005665722379603,
  'Happiness_precision': 0.6080402010050251,
  'Happiness_recall': (0.6470588235294118,),
  'Happiness_f1-score': 0.6269430051813472,
  'Neutrality_precision': 0.5583333333333333,
  'Neutrality_recall': (0.8023952095808383,),
  'Neutrality_f1-score': 0.6584766584766585,
  'Sadness_precision': 0.6345177664974619,
  'Sadness_recall': (0.6684491978609626,),
  'Sadness_f1-score

In [47]:
exp_clf_with_feature_selected(lightgbm9, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.84      0.76      0.80       187
    Calmness       0.71      0.94      0.81        16
     Disgust       0.65      0.44      0.

({'accuracy': 0.6567796610169492,
  'precision': 0.6887161012475413,
  'recall': 0.7045076630205175,
  'f1-score': 0.6892639918253158,
  'Anger_precision': 0.8402366863905325,
  'Anger_recall': (0.7593582887700535,),
  'Anger_f1-score': 0.7977528089887641,
  'Calmness_precision': 0.7142857142857143,
  'Calmness_recall': (0.9375,),
  'Calmness_f1-score': 0.8108108108108109,
  'Disgust_precision': 0.6456692913385826,
  'Disgust_recall': (0.4385026737967914,),
  'Disgust_f1-score': 0.5222929936305732,
  'Fear_precision': 0.6033519553072626,
  'Fear_recall': (0.5775401069518716,),
  'Fear_f1-score': 0.5901639344262294,
  'Happiness_precision': 0.6,
  'Happiness_recall': (0.6256684491978609,),
  'Happiness_f1-score': 0.612565445026178,
  'Neutrality_precision': 0.5617021276595745,
  'Neutrality_recall': (0.7904191616766467,),
  'Neutrality_f1-score': 0.6567164179104478,
  'Sadness_precision': 0.649746192893401,
  'Sadness_recall': (0.6844919786096256,),
  'Sadness_f1-score': 0.6666666666666

In [48]:
exp_clf_with_feature_selected(lightgbm10, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026986 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927










Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.77      0.79       187
    Calmness       0.56      0.88      0.68        16
     Disgust       0.66      0.41      0.50       187
        Fear       0.65      0.57      0.60       187
   Happiness       0.61      0.63      0.62       187
  Neutrality       0.54      0.82      0.65       167
     Sadness       0.65      0.68      0.67       187
    Surprise       0.91      0.81      0.85        62

    accuracy                           0.66      1180
   macro avg       0.67      0.70      0.67      1180
weighted avg       0.67      0.66      0.65      1180

[[144   0  12   5  24   2   0   0]
 [  0  14   0   0   0   1   1   0]
 [  9   0  76  15  22  50  15   0]
 [  4   0   4 106  21  15  36   1]
 [ 20   3   7  11 118  21   5   2]
 [  0   4   6   8   3 137   9   0]
 [  0   4   8  15   3  27 128   2]
 [  1   0   2   4   3   0   2  

({'accuracy': 0.6550847457627119,
  'precision': 0.6735126656528339,
  'recall': 0.6950793029690145,
  'f1-score': 0.6717724346738374,
  'Anger_precision': 0.8089887640449438,
  'Anger_recall': (0.7700534759358288,),
  'Anger_f1-score': 0.7890410958904108,
  'Calmness_precision': 0.56,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.6829268292682927,
  'Disgust_precision': 0.6608695652173913,
  'Disgust_recall': (0.40641711229946526,),
  'Disgust_f1-score': 0.5033112582781457,
  'Fear_precision': 0.6463414634146342,
  'Fear_recall': (0.5668449197860963,),
  'Fear_f1-score': 0.6039886039886039,
  'Happiness_precision': 0.6082474226804123,
  'Happiness_recall': (0.6310160427807486,),
  'Happiness_f1-score': 0.6194225721784776,
  'Neutrality_precision': 0.541501976284585,
  'Neutrality_recall': (0.8203592814371258,),
  'Neutrality_f1-score': 0.6523809523809523,
  'Sadness_precision': 0.6530612244897959,
  'Sadness_recall': (0.6844919786096256,),
  'Sadness_f1-score': 0.66840731070

In [49]:
exp_clf_with_feature_selected(lightgbm11, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022399 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.83      0.78      0.80       187
    Calmness       0.63      0.75      0.69        16
     Disgust       0.66      0.43      0.

({'accuracy': 0.6576271186440678,
  'precision': 0.6804514906443683,
  'recall': 0.6823162261814133,
  'f1-score': 0.6747961358356753,
  'Anger_precision': 0.8285714285714286,
  'Anger_recall': (0.7754010695187166,),
  'Anger_f1-score': 0.8011049723756906,
  'Calmness_precision': 0.631578947368421,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.6857142857142857,
  'Disgust_precision': 0.6639344262295082,
  'Disgust_recall': (0.43315508021390375,),
  'Disgust_f1-score': 0.5242718446601943,
  'Fear_precision': 0.6363636363636364,
  'Fear_recall': (0.5989304812834224,),
  'Fear_f1-score': 0.6170798898071624,
  'Happiness_precision': 0.6142131979695431,
  'Happiness_recall': (0.6470588235294118,),
  'Happiness_f1-score': 0.6302083333333333,
  'Neutrality_precision': 0.5622317596566524,
  'Neutrality_recall': (0.7844311377245509,),
  'Neutrality_f1-score': 0.6549999999999999,
  'Sadness_precision': 0.6138613861386139,
  'Sadness_recall': (0.6631016042780749,),
  'Sadness_f1-score': 

In [50]:
exp_clf_with_feature_selected(lightgbm12, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024825 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.84      0.78      0.81       187
    Calmness       0.75      0.75      0.75        16
     Disgust       0.65      0.43      0.

({'accuracy': 0.6576271186440678,
  'precision': 0.6928354756816091,
  'recall': 0.6825563875698664,
  'f1-score': 0.6811281195207204,
  'Anger_precision': 0.838150289017341,
  'Anger_recall': (0.7754010695187166,),
  'Anger_f1-score': 0.8055555555555556,
  'Calmness_precision': 0.75,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.75,
  'Disgust_precision': 0.6532258064516129,
  'Disgust_recall': (0.43315508021390375,),
  'Disgust_f1-score': 0.5209003215434084,
  'Fear_precision': 0.6280487804878049,
  'Fear_recall': (0.5508021390374331,),
  'Fear_f1-score': 0.586894586894587,
  'Happiness_precision': 0.6173469387755102,
  'Happiness_recall': (0.6470588235294118,),
  'Happiness_f1-score': 0.6318537859007833,
  'Neutrality_precision': 0.5537190082644629,
  'Neutrality_recall': (0.8023952095808383,),
  'Neutrality_f1-score': 0.6552567237163814,
  'Sadness_precision': 0.625,
  'Sadness_recall': (0.6951871657754011,),
  'Sadness_f1-score': 0.6582278481012658,
  'Surprise_precision'

In [51]:
exp_clf_with_feature_selected(lightgbm13, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.028756 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.75      0.78       187
    Calmness       0.71      0.75      0.73        16
     Disgust       0.67      0.45      0.

({'accuracy': 0.6601694915254237,
  'precision': 0.6899064820798478,
  'recall': 0.6860695225901483,
  'f1-score': 0.6811020880092568,
  'Anger_precision': 0.813953488372093,
  'Anger_recall': (0.7486631016042781,),
  'Anger_f1-score': 0.7799442896935934,
  'Calmness_precision': 0.7058823529411765,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.7272727272727272,
  'Disgust_precision': 0.6666666666666666,
  'Disgust_recall': (0.44919786096256686,),
  'Disgust_f1-score': 0.536741214057508,
  'Fear_precision': 0.6463414634146342,
  'Fear_recall': (0.5668449197860963,),
  'Fear_f1-score': 0.6039886039886039,
  'Happiness_precision': 0.6455026455026455,
  'Happiness_recall': (0.6524064171122995,),
  'Happiness_f1-score': 0.648936170212766,
  'Neutrality_precision': 0.5396825396825397,
  'Neutrality_recall': (0.8143712574850299,),
  'Neutrality_f1-score': 0.6491646778042959,
  'Sadness_precision': 0.6368159203980099,
  'Sadness_recall': (0.6844919786096256,),
  'Sadness_f1-score': 0.

In [52]:
exp_clf_with_feature_selected(lightgbm14, X_train, X_test, y_train_e, y_test_e)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32640
[LightGBM] [Info] Number of data points in the train set: 24885, number of used features: 128
[LightGBM] [Info] Start training from score -2.525628
[LightGBM] [Info] Start training from score -3.342099
[LightGBM] [Info] Start training from score -2.522119
[LightGBM] [Info] Start training from score -2.512654
[LightGBM] [Info] Start training from score -1.391815
[LightGBM] [Info] Start training from score -1.175646
[LightGBM] [Info] Start training from score -2.524624
[LightGBM] [Info] Start training from score -2.453927
Model Name: LGBMClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.75      0.78       187
    Calmness       0.71      0.75      0.73        16
     Disgust       0.67      0.45      0.

({'accuracy': 0.6601694915254237,
  'precision': 0.6899064820798478,
  'recall': 0.6860695225901483,
  'f1-score': 0.6811020880092568,
  'Anger_precision': 0.813953488372093,
  'Anger_recall': (0.7486631016042781,),
  'Anger_f1-score': 0.7799442896935934,
  'Calmness_precision': 0.7058823529411765,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.7272727272727272,
  'Disgust_precision': 0.6666666666666666,
  'Disgust_recall': (0.44919786096256686,),
  'Disgust_f1-score': 0.536741214057508,
  'Fear_precision': 0.6463414634146342,
  'Fear_recall': (0.5668449197860963,),
  'Fear_f1-score': 0.6039886039886039,
  'Happiness_precision': 0.6455026455026455,
  'Happiness_recall': (0.6524064171122995,),
  'Happiness_f1-score': 0.648936170212766,
  'Neutrality_precision': 0.5396825396825397,
  'Neutrality_recall': (0.8143712574850299,),
  'Neutrality_f1-score': 0.6491646778042959,
  'Sadness_precision': 0.6368159203980099,
  'Sadness_recall': (0.6844919786096256,),
  'Sadness_f1-score': 0.

In [53]:
# change y_lable into emo
# exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_e, y_test_e)

In [54]:
# check how long on normal gradient boosting
# exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_e, y_test_e)

### Threshold tuning

In [55]:
# Calculate accuracy for the given threshold

probabilities = lightgbm6.predict_proba(X_test)

def calc_acc_by_thres(probabilities, threshold, y_test):
    predictions_adj = []
    # Loop through each sample's probabilities
    for probs in probabilities:
        if probs[0] > threshold:
            pred_class = -1
        elif probs[1] > probs[2]:
            pred_class = 0
        else:
            pred_class = 1
        predictions_adj.append(pred_class)
    accuracy = np.mean(predictions_adj == y_test)
    precision, recall, f1score, _ = precision_recall_fscore_support(y_test_s, predictions_adj, average=None)
    return accuracy, min(f1score), np.var(f1score)

best_threshold = None
best_accuracy = 0.0
best_f1score = 0.0
# best_f1s_var = 10

# Define a range of threshold values to try
threshold_range = np.linspace(0.25, 0.75, 100)
for threshold in threshold_range:
    accuracy, min_f1_score, var_f1_score = calc_acc_by_thres(probabilities, threshold, y_test_s)
    # if accuracy > best_accuracy:
    #     best_accuracy = accuracy
    #     best_threshold = threshold
    if min_f1_score > best_f1score:
        best_f1score = min_f1_score
        best_threshold = threshold
        best_accuracy = accuracy
    # if var_f1_score < best_f1s_var:
    #     best_f1score = min_f1_score
    #     best_threshold = threshold
    #     best_accuracy = accuracy
    #     best_f1s_var = var_f1_score
        

print("Best Threshold:", best_threshold)
print("Best Accuracy:", best_accuracy)
print("Best min f1 score:", best_f1score)
# print("Best var f1 score:", best_f1s_var)

Best Threshold: 0.25
Best Accuracy: 0.28559322033898304
Best min f1 score: 0.10945273631840796


In [56]:
# if you want to adjust the threshold; Predict probabilities for each class label;
# best threshold and best_threshold x 110% 120% 130% to see trend
probabilities = lightgbm.predict_proba(X_test)

threshold = best_threshold
print('BEST Threshold:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.1
print('Threshold-2:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.2
print('Threshold-3:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.3
print('Threshold-4:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

BEST Threshold: 0.25
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       748
           0       0.73      0.06      0.11       183
           1       0.21      1.00      0.35       249

    accuracy                           0.22      1180
   macro avg       0.32      0.35      0.15      1180
weighted avg       0.16      0.22      0.09      1180

[[  0   3 745]
 [  0  11 172]
 [  0   1 248]]
Accuracy: 0.21949152542372882
Threshold-2: 0.275
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       748
           0       0.73      0.06      0.11       183
           1       0.21      1.00      0.35       249

    accuracy                           0.22      1180
   macro avg       0.32      0.35      0.15      1180
weighted avg       0.16      0.22      0.09      1180

[[  0   3 745]
 [  0  11 172]
 [  0   1 248]]
Accuracy: 0.21949152542372882
Threshold-3: 0.3
              precision    re