In [48]:
import numpy as np
import pandas as pd
import glob
import os, sys
import matplotlib.pyplot as plt
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
VERSION = 4
RANDOM_SEED = 26

## Load data

### Load Data - downsampled

In [49]:
# df_joint_train_org   = pd.read_csv(f'./features/cache_all_features_train_V{VERSION}.csv')
# df_joint_train_org   = df_joint_train_org.drop(columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])
                       
# df_joint_test_org = pd.read_csv(f'./features/cache_all_features_test_V{VERSION}.csv').drop(
#     columns=['GNE_max_gne','GNE_mean_gne','GNE_stddev_gne','GNE_sum_gne'])

# print("shape of train set: ", df_joint_train_org.shape)
# print("shape of test  set: ", df_joint_test_org.shape)


# df_joint_train  = pd.read_csv(f'./features/cache_train_V4_resampled_2500.csv')
df_joint_test = pd.read_csv(f'./cache_all_features_test_V4.csv')

# print("shape of train set: ", df_joint_train.shape)
print("shape of test  set: ", df_joint_test.shape)

df_joint_train_aug  = pd.read_csv(f'./cache_train_V4_augmented.csv')
feature_column_names = [i for i in df_joint_train_aug.columns \
                        if i not in ['file_path','renamed_file_path','split','sentiment_value','emotional_category']]
             
print("shape of train set: ", df_joint_train_aug.shape)
df_joint_train_aug.groupby('sentiment_value')['file_path'].count()

shape of test  set:  (1180, 1550)
shape of train set:  (24885, 1546)


sentiment_value
-1    7999
 0    8560
 1    8326
Name: file_path, dtype: int64

### best guess feature combinations

In [50]:
# generate selected features 
def generate_selected_features_by_type(feature_column_names,input,stats,number=1):
    selected_result = []
    for name in feature_column_names:
        if input+"_"+stats in name:
            selected_result.append(name)
    if number < len(selected_result):
        selected_result = selected_result[:number]
    return selected_result

# example to take mfcc 20 mean & std; mel32; zcr all 5 stats features
feature_MFCC20_mean  = generate_selected_features_by_type(feature_column_names,"mfcc","mean",20)
feature_MFCC20_std   = generate_selected_features_by_type(feature_column_names,"mfcc","std",20)
feature_mel32_median = generate_selected_features_by_type(feature_column_names,"mel32","median",32)
feature_mel32_std    = generate_selected_features_by_type(feature_column_names,"mel32","std",32)
feature_zcr_stats    = generate_selected_features_by_type(feature_column_names,"zcr","",5)
feature_rms_stats    = generate_selected_features_by_type(feature_column_names,"rms","",5)
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum','Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
selected_HNR = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
selected_prosody = selected_intensity + selected_pitch # + ['Local Jitter','Local Shimmer']
selected_feature_names = feature_MFCC20_mean + feature_MFCC20_std + feature_mel32_median + feature_mel32_std + \
                        feature_zcr_stats + feature_rms_stats + selected_intensity + selected_pitch 

In [51]:
# default use augmented training set and balanced test set
X_train = df_joint_train_aug[selected_feature_names]
y_train_s = df_joint_train_aug['sentiment_value']
y_train_e = df_joint_train_aug['emotional_category']

# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# y_e_num = label_encoder.fit_transform(y_train_e)

X_test = df_joint_test[selected_feature_names]
y_test_s = df_joint_test['sentiment_value']
y_test_e = df_joint_test['emotional_category']

# y_test_e_num = label_encoder.fit_transform(y_test_e)

In [52]:
X_train.shape, X_test.shape

((24885, 128), (1180, 128))

In [53]:
# !pip install lightgbm

## Models

In [54]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score, precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier,HistGradientBoostingClassifier
from lightgbm import LGBMClassifier

# Common adjustable parameters
common_params = {
    'RandomForest': {'n_estimators': 100, 'criterion':'gini', 'max_depth': None, 
                     'min_samples_split':100, 'bootstrap':True, 'n_jobs':3, 'random_state': RANDOM_SEED},
    'SVM': {'kernel': 'rbf', 'C': 1.0, 'probability': True},
    'KNN': {'n_neighbors': 2},
    'KNN1': {'n_neighbors': 3},
    'KNN2': {'n_neighbors': 4},
    'KNN3': {'n_neighbors': 5},
    'KNN4': {'n_neighbors': 6},
    'KNN5': {'n_neighbors': 8},
    'KNN6': {'n_neighbors': 10},
    'KNN7': {'n_neighbors': 12},
    'KNN8': {'n_neighbors': 15},
    'KNN9': {'n_neighbors': 20},
    'KNN10': {'n_neighbors': 30},
    'GradientBoosting': {'loss': 'log_loss', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                         'criterion': 'friedman_mse', 'min_samples_split': 2, 'max_depth': 3},
    'GradientBoostingFast': {'loss': 'log_loss', 'learning_rate': 0.1, 'max_iter': 100},
    'AdaBoost': {'n_estimators': 50, 'learning_rate': 1.0},
    'LightGBM': {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 1.0,
                 'min_child_samples': 20, 'max_depth': -1}    
}

# Models with common adjustable parameters
dtree   = DecisionTreeClassifier()
rforest = RandomForestClassifier(**common_params['RandomForest'])
svm     = SVC(**common_params['SVM'])
knn     = KNeighborsClassifier(**common_params['KNN'])
knn1     = KNeighborsClassifier(**common_params['KNN1'])
knn2     = KNeighborsClassifier(**common_params['KNN2'])
knn3     = KNeighborsClassifier(**common_params['KNN3'])
knn4     = KNeighborsClassifier(**common_params['KNN4'])
knn5     = KNeighborsClassifier(**common_params['KNN5'])
knn6     = KNeighborsClassifier(**common_params['KNN6'])
knn7     = KNeighborsClassifier(**common_params['KNN7'])
knn8     = KNeighborsClassifier(**common_params['KNN8'])
knn9     = KNeighborsClassifier(**common_params['KNN9'])
knn10     = KNeighborsClassifier(**common_params['KNN10'])
gboost  = GradientBoostingClassifier(**common_params['GradientBoosting'])
gb_fast = HistGradientBoostingClassifier(**common_params['GradientBoostingFast'])
adaBoost= AdaBoostClassifier(**common_params['AdaBoost'])
lightgbm=LGBMClassifier(**common_params['LightGBM'])

In [55]:
# def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test):
#     start = time.time()
#     print(f'Model Name: {clf_model.__class__};\n Train set shape {X_train.shape}, num of class {y_train.unique().size}')
#     predictions = clf_model.fit(X_train, y_train).predict(X_test.values)
    
#     print(classification_report(y_test, predictions))
#     print(confusion_matrix(y_test, predictions))
    
#     precision, recall, f1score, support = precision_recall_fscore_support(y_test, predictions, average=None)
#     # TODO make all metrics into result dict for recording 
#     probabilities = clf_model.predict_proba(X_test.values)
#     print('prbabilities distribution: \n', pd.DataFrame(probabilities,columns=clf_model.classes_).describe())
#     print(f'Time taken: {round(time.time()-start,3)} seconds.\n')

In [56]:
import pickle

def exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train, y_test,verbose=True):
    start = time.time()
    
    clf_model.fit(X_train, y_train)
    predictions = clf_model.predict(X_test.values)
    
    # Calculate metrics
    report = classification_report(y_test, predictions, output_dict=True)
    metrics = {
        'accuracy': report['accuracy'],
        'precision': report['macro avg']['precision'],
        'recall': report['macro avg']['recall'],
        'f1-score': report['macro avg']['f1-score']
    }
    for class_name in report.keys():
        if class_name not in ['accuracy', 'macro avg', 'weighted avg']:
            metrics[class_name+'_precision'] = report[class_name]['precision']
            metrics[class_name+'_recall'] = report[class_name]['recall'],
            metrics[class_name+'_f1-score'] = report[class_name]['f1-score']
    
    feature_columns = list(X_train.columns)
    num_classes = y_train.nunique()
    class_names = list(y_train.unique())
    
    model_filename = f"./models/{clf_model.__class__.__name__}_model"
    model_filename += f"_{num_classes}cls_{len(feature_columns)}feat_{round(report['accuracy']*100)}acc.pkl"
    with open(model_filename, 'wb') as file:
        pickle.dump(clf_model, file)
    
    results = {**metrics,
        'num_classes': num_classes,
        'class_names': class_names,
        'model_filename': model_filename,
        'feature_columns': feature_columns,
    }
    
    if verbose:
        print(f"Model Name: {clf_model.__class__.__name__};\nTrain set shape {X_train.shape}, num of class {num_classes}")
        print(classification_report(y_test, predictions))
        print(confusion_matrix(y_test, predictions))
        probabilities = clf_model.predict_proba(X_test.values)
        print('Probabilities distribution:\n', pd.DataFrame(probabilities, columns=clf_model.classes_).describe())
    print(f"Model: {clf_model.__class__.__name__};Time taken: {round(time.time()-start, 3)} seconds.\n")

    return results, clf_model


### Sentiment 3-class Classifier Sample code

In [57]:
result, m_trained = exp_clf_with_feature_selected(knn, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.76      0.80      0.78       748
           0       0.49      0.54      0.52       183
           1       0.62      0.50      0.55       249

    accuracy                           0.69      1180
   macro avg       0.63      0.61      0.62      1180
weighted avg       0.69      0.69      0.69      1180

[[595  80  73]
 [ 80  99   4]
 [103  21 125]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.518220     0.211864     0.269915
std       0.423402     0.354343     0.384476
min       0.000000     0.000000     0.000000
25%       0.000000     0.000000     0.000000
50%       0.500000     0.000000     0.000000
75%       1.000000     0.500000     0.500000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.756 seconds.



In [58]:
result, m_trained = exp_clf_with_feature_selected(knn1, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.80      0.69      0.74       748
           0       0.50      0.65      0.57       183
           1       0.53      0.64      0.58       249

    accuracy                           0.67      1180
   macro avg       0.61      0.66      0.63      1180
weighted avg       0.70      0.67      0.68      1180

[[514 101 133]
 [ 56 119   8]
 [ 71  18 160]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.516102     0.214689     0.269209
std       0.389796     0.329281     0.362367
min       0.000000     0.000000     0.000000
25%       0.000000     0.000000     0.000000
50%       0.666667     0.000000     0.000000
75%       1.000000     0.333333     0.666667
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.721 seconds.



In [59]:
result, m_trained = exp_clf_with_feature_selected(knn2, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.80      0.75      0.77       748
           0       0.48      0.60      0.54       183
           1       0.59      0.59      0.59       249

    accuracy                           0.69      1180
   macro avg       0.62      0.65      0.63      1180
weighted avg       0.70      0.69      0.70      1180

[[560  94  94]
 [ 64 110   9]
 [ 80  23 146]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.505085     0.219703     0.275212
std       0.367919     0.316334     0.346511
min       0.000000     0.000000     0.000000
25%       0.250000     0.000000     0.000000
50%       0.500000     0.000000     0.000000
75%       0.750000     0.500000     0.500000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.722 seconds.



In [60]:
result, m_trained = exp_clf_with_feature_selected(knn3, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.79      0.69      0.74       748
           0       0.46      0.63      0.53       183
           1       0.57      0.63      0.60       249

    accuracy                           0.67      1180
   macro avg       0.61      0.65      0.62      1180
weighted avg       0.69      0.67      0.68      1180

[[517 117 114]
 [ 63 115   5]
 [ 72  20 157]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.500000     0.225254     0.274746
std       0.355688     0.308884     0.339044
min       0.000000     0.000000     0.000000
25%       0.200000     0.000000     0.000000
50%       0.400000     0.000000     0.200000
75%       0.800000     0.400000     0.400000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.719 seconds.



In [61]:
result, m_trained = exp_clf_with_feature_selected(knn4, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.80      0.74      0.77       748
           0       0.51      0.66      0.57       183
           1       0.60      0.61      0.61       249

    accuracy                           0.70      1180
   macro avg       0.64      0.67      0.65      1180
weighted avg       0.71      0.70      0.70      1180

[[552  99  97]
 [ 59 120   4]
 [ 79  18 152]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.498870     0.228390     0.272740
std       0.346433     0.304258     0.329960
min       0.000000     0.000000     0.000000
25%       0.166667     0.000000     0.000000
50%       0.500000     0.000000     0.166667
75%       0.833333     0.333333     0.500000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.722 seconds.



In [62]:
result, m_trained = exp_clf_with_feature_selected(knn5, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.81      0.73      0.76       748
           0       0.48      0.65      0.55       183
           1       0.61      0.63      0.62       249

    accuracy                           0.69      1180
   macro avg       0.63      0.67      0.64      1180
weighted avg       0.71      0.69      0.70      1180

[[543 108  97]
 [ 59 119   5]
 [ 70  22 157]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.494068     0.234534     0.271398
std       0.335144     0.299401     0.319642
min       0.000000     0.000000     0.000000
25%       0.250000     0.000000     0.000000
50%       0.500000     0.125000     0.125000
75%       0.750000     0.375000     0.500000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.716 seconds.



In [63]:
result, m_trained = exp_clf_with_feature_selected(knn6, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.81      0.71      0.76       748
           0       0.47      0.68      0.55       183
           1       0.60      0.63      0.62       249

    accuracy                           0.69      1180
   macro avg       0.63      0.67      0.64      1180
weighted avg       0.71      0.69      0.70      1180

[[531 117 100]
 [ 55 124   4]
 [ 68  24 157]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.490254     0.238051     0.271695
std       0.323360     0.295056     0.313256
min       0.000000     0.000000     0.000000
25%       0.200000     0.000000     0.000000
50%       0.500000     0.100000     0.200000
75%       0.800000     0.400000     0.400000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.737 seconds.



In [64]:
result, m_trained = exp_clf_with_feature_selected(knn7, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.81      0.70      0.75       748
           0       0.46      0.68      0.55       183
           1       0.59      0.63      0.61       249

    accuracy                           0.68      1180
   macro avg       0.62      0.67      0.64      1180
weighted avg       0.71      0.68      0.69      1180

[[522 120 106]
 [ 54 124   5]
 [ 68  24 157]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.488771     0.237006     0.274223
std       0.317473     0.289483     0.309161
min       0.000000     0.000000     0.000000
25%       0.250000     0.000000     0.000000
50%       0.500000     0.083333     0.166667
75%       0.750000     0.416667     0.416667
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.737 seconds.



In [65]:
result, m_trained = exp_clf_with_feature_selected(knn8, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.81      0.68      0.74       748
           0       0.47      0.71      0.57       183
           1       0.56      0.63      0.59       249

    accuracy                           0.67      1180
   macro avg       0.62      0.67      0.63      1180
weighted avg       0.71      0.67      0.68      1180

[[508 122 118]
 [ 48 130   5]
 [ 69  23 157]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.483107     0.241525     0.275367
std       0.306357     0.285424     0.303542
min       0.000000     0.000000     0.000000
25%       0.266667     0.000000     0.000000
50%       0.466667     0.133333     0.133333
75%       0.733333     0.400000     0.466667
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.756 seconds.



In [66]:
result, m_trained = exp_clf_with_feature_selected(knn9, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.81      0.67      0.73       748
           0       0.45      0.71      0.55       183
           1       0.57      0.62      0.59       249

    accuracy                           0.67      1180
   macro avg       0.61      0.67      0.63      1180
weighted avg       0.70      0.67      0.68      1180

[[502 134 112]
 [ 48 130   5]
 [ 71  24 154]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.476949     0.246737     0.276314
std       0.296649     0.280297     0.298413
min       0.000000     0.000000     0.000000
25%       0.250000     0.000000     0.000000
50%       0.450000     0.150000     0.150000
75%       0.700000     0.450000     0.450000
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.777 seconds.



In [67]:
result, m_trained = exp_clf_with_feature_selected(knn10, X_train, X_test, y_train_s, y_test_s)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 3
              precision    recall  f1-score   support

          -1       0.85      0.66      0.74       748
           0       0.45      0.78      0.57       183
           1       0.60      0.66      0.63       249

    accuracy                           0.68      1180
   macro avg       0.63      0.70      0.65      1180
weighted avg       0.73      0.68      0.69      1180

[[497 142 109]
 [ 39 143   1]
 [ 52  32 165]]
Probabilities distribution:
                 -1            0            1
count  1180.000000  1180.000000  1180.000000
mean      0.462938     0.253870     0.283192
std       0.282281     0.274405     0.292576
min       0.000000     0.000000     0.000000
25%       0.266667     0.000000     0.033333
50%       0.433333     0.166667     0.166667
75%       0.666667     0.466667     0.433333
max       1.000000     1.000000     1.000000
Model: KNeighborsClassifier;Time taken: 0.755 seconds.



In [68]:
# change model as the first parameter in the function 
# result, m_trained = exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_s, y_test_s)

In [69]:
# result, m_trained = exp_clf_with_feature_selected(adaBoost, X_train, X_test, y_train_s, y_test_s)

In [70]:
# result, m_trained = exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_s, y_test_s)

In [71]:
# result, m_trained = exp_clf_with_feature_selected(knn, X_train, X_test, y_train_s, y_test_s)

In [72]:
# result, m_trained = exp_clf_with_feature_selected(svm, X_train, X_test, y_train_s, y_test_s)

In [73]:
# result, m_trained = exp_clf_with_feature_selected(rforest, X_train, X_test, y_train_s, y_test_s)

In [74]:
# result, m_trained = exp_clf_with_feature_selected(lightgbm, X_train, X_test, y_train_s, y_test_s)

In [75]:
# result, m_trained = exp_clf_with_feature_selected(dtree, X_train, X_test, y_train_s, y_test_s)

### How to save experiment metrics result

In [76]:
exp_results = []
for clf_model in [knn, knn1, knn2, knn3, knn4, knn5,knn6,knn7,knn8,knn9,knn10]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

Model: KNeighborsClassifier;Time taken: 0.414 seconds.

Model: KNeighborsClassifier;Time taken: 0.434 seconds.

Model: KNeighborsClassifier;Time taken: 0.427 seconds.

Model: KNeighborsClassifier;Time taken: 0.446 seconds.

Model: KNeighborsClassifier;Time taken: 0.455 seconds.

Model: KNeighborsClassifier;Time taken: 0.445 seconds.

Model: KNeighborsClassifier;Time taken: 0.457 seconds.

Model: KNeighborsClassifier;Time taken: 0.45 seconds.

Model: KNeighborsClassifier;Time taken: 0.46 seconds.

Model: KNeighborsClassifier;Time taken: 0.468 seconds.

Model: KNeighborsClassifier;Time taken: 0.453 seconds.



Unnamed: 0,accuracy,precision,recall,f1-score,-1_precision,-1_recall,-1_f1-score,0_precision,0_recall,0_f1-score,1_precision,1_recall,1_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.694068,0.626198,0.612815,0.617037,0.764781,"(0.7954545454545454,)",0.779817,0.495,"(0.5409836065573771,)",0.516971,0.618812,"(0.5020080321285141,)",0.554324,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.672034,0.611145,0.660003,0.62908,0.801872,"(0.6871657754010695,)",0.740101,0.5,"(0.6502732240437158,)",0.565321,0.531561,"(0.642570281124498,)",0.581818,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.691525,0.622127,0.645367,0.631427,0.795455,"(0.7486631016042781,)",0.77135,0.484581,"(0.6010928961748634,)",0.536585,0.586345,"(0.5863453815261044,)",0.586345,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.668644,0.606045,0.650038,0.621801,0.792945,"(0.6911764705882353,)",0.738571,0.456349,"(0.6284153005464481,)",0.528736,0.568841,"(0.6305220883534136,)",0.598095,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.698305,0.635707,0.668049,0.648246,0.8,"(0.7379679144385026,)",0.767733,0.506329,"(0.6557377049180327,)",0.571429,0.600791,"(0.6104417670682731,)",0.605578,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.694068,0.630708,0.66891,0.644608,0.808036,"(0.7259358288770054,)",0.764789,0.477912,"(0.6502732240437158,)",0.550926,0.606178,"(0.6305220883534136,)",0.61811,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.688136,0.627128,0.67267,0.642249,0.811927,"(0.7098930481283422,)",0.757489,0.467925,"(0.6775956284153005,)",0.553571,0.601533,"(0.6305220883534136,)",0.615686,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.680508,0.619689,0.66866,0.635746,0.810559,"(0.6978609625668449,)",0.75,0.462687,"(0.6775956284153005,)",0.549889,0.585821,"(0.6305220883534136,)",0.60735,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.673729,0.615414,0.67335,0.633748,0.8128,"(0.679144385026738,)",0.739985,0.472727,"(0.7103825136612022,)",0.567686,0.560714,"(0.6305220883534136,)",0.593573,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.666102,0.609343,0.66666,0.625902,0.808374,"(0.6711229946524064,)",0.733382,0.451389,"(0.7103825136612022,)",0.552017,0.568266,"(0.6184738955823293,)",0.592308,3,"[-1, 1, 0]",./models/KNeighborsClassifier_model_3cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [77]:
# exp_results = []
# for clf_model in [rforest,adaBoost,gb_fast,gboost,knn,svm,lightgbm,dtree]:
#     result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_s, y_test_s,verbose=False)
#     exp_results.append(result)
# pd.DataFrame(exp_results)

In [78]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-knn.xlsx")

### Emotion 8-class

In [79]:
exp_results = []
for clf_model in [knn, knn1, knn2, knn3, knn4, knn5,knn6,knn7,knn8,knn9,knn10]:
    result, m_trained = exp_clf_with_feature_selected(clf_model, X_train, X_test, y_train_e, y_test_e,verbose=False)
    exp_results.append(result)
pd.DataFrame(exp_results)

Model: KNeighborsClassifier;Time taken: 0.544 seconds.

Model: KNeighborsClassifier;Time taken: 0.517 seconds.

Model: KNeighborsClassifier;Time taken: 0.549 seconds.

Model: KNeighborsClassifier;Time taken: 0.534 seconds.

Model: KNeighborsClassifier;Time taken: 0.534 seconds.

Model: KNeighborsClassifier;Time taken: 0.582 seconds.

Model: KNeighborsClassifier;Time taken: 0.556 seconds.

Model: KNeighborsClassifier;Time taken: 0.547 seconds.

Model: KNeighborsClassifier;Time taken: 0.566 seconds.

Model: KNeighborsClassifier;Time taken: 0.569 seconds.

Model: KNeighborsClassifier;Time taken: 0.582 seconds.



Unnamed: 0,accuracy,precision,recall,f1-score,Anger_precision,Anger_recall,Anger_f1-score,Calmness_precision,Calmness_recall,Calmness_f1-score,...,Sadness_precision,Sadness_recall,Sadness_f1-score,Surprise_precision,Surprise_recall,Surprise_f1-score,num_classes,class_names,model_filename,feature_columns
0,0.509322,0.555103,0.553502,0.547137,0.569231,"(0.5935828877005348,)",0.581152,0.571429,"(0.75,)",0.648649,...,0.741379,"(0.45989304812834225,)",0.567657,0.796296,"(0.6935483870967742,)",0.741379,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
1,0.521186,0.553484,0.582699,0.562715,0.602273,"(0.5668449197860963,)",0.584022,0.666667,"(0.875,)",0.756757,...,0.666667,"(0.5561497326203209,)",0.606414,0.681159,"(0.7580645161290323,)",0.717557,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
2,0.517797,0.54535,0.573202,0.547283,0.623457,"(0.5401069518716578,)",0.578797,0.541667,"(0.8125,)",0.65,...,0.643312,"(0.5401069518716578,)",0.587209,0.681159,"(0.7580645161290323,)",0.717557,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
3,0.526271,0.566992,0.58711,0.562041,0.691275,"(0.5508021390374331,)",0.613095,0.583333,"(0.875,)",0.7,...,0.681818,"(0.5614973262032086,)",0.615836,0.701493,"(0.7580645161290323,)",0.728682,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
4,0.528814,0.565686,0.587061,0.557793,0.704698,"(0.5614973262032086,)",0.625,0.538462,"(0.875,)",0.666667,...,0.666667,"(0.5454545454545454,)",0.6,0.703125,"(0.7258064516129032,)",0.714286,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
5,0.541525,0.585885,0.596922,0.569869,0.771429,"(0.5775401069518716,)",0.66055,0.541667,"(0.8125,)",0.65,...,0.677019,"(0.5828877005347594,)",0.626437,0.704225,"(0.8064516129032258,)",0.75188,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
6,0.54661,0.59159,0.601413,0.569498,0.769784,"(0.5721925133689839,)",0.656442,0.541667,"(0.8125,)",0.65,...,0.6625,"(0.5668449197860963,)",0.610951,0.666667,"(0.8064516129032258,)",0.729927,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
7,0.552542,0.604342,0.606733,0.573986,0.773723,"(0.5668449197860963,)",0.654321,0.565217,"(0.8125,)",0.666667,...,0.698718,"(0.5828877005347594,)",0.635569,0.641026,"(0.8064516129032258,)",0.714286,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
8,0.559322,0.620059,0.623668,0.579317,0.787402,"(0.5347593582887701,)",0.636943,0.518519,"(0.875,)",0.651163,...,0.68323,"(0.5882352941176471,)",0.632184,0.654321,"(0.8548387096774194,)",0.741259,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."
9,0.538136,0.604324,0.606876,0.560963,0.806723,"(0.5133689839572193,)",0.627451,0.518519,"(0.875,)",0.651163,...,0.664596,"(0.5721925133689839,)",0.614943,0.654321,"(0.8548387096774194,)",0.741259,8,"[Disgust, Anger, Fear, Sadness, Happiness, Sur...",./models/KNeighborsClassifier_model_8cls_128fe...,"[mfcc_mean_1, mfcc_mean_2, mfcc_mean_3, mfcc_m..."


In [80]:
pd.DataFrame(exp_results).to_excel("exp_result-0316-v4-aug-knn-8.xlsx")

In [81]:
exp_clf_with_feature_selected(knn, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.57      0.59      0.58       187
    Calmness       0.57      0.75      0.65        16
     Disgust       0.45      0.42      0.43       187
        Fear       0.40      0.46      0.43       187
   Happiness       0.41      0.45      0.43       187
  Neutrality       0.50      0.60      0.55       167
     Sadness       0.74      0.46      0.57       187
    Surprise       0.80      0.69      0.74        62

    accuracy                           0.51      1180
   macro avg       0.56      0.55      0.55      1180
weighted avg       0.53      0.51      0.51      1180

[[111   0  17  23  30   1   1   4]
 [  0  12   0   0   0   1   3   0]
 [ 10   0  78  30  26  39   4   0]
 [ 20   0  22  86  23  20  11   5]
 [ 47   2  15  27  84  11   1   0]
 [  2   2  20  17  14 101  10   1]
 [  2   3  20  27  19  29  86   1]
 [  3   2   1   4   9   0

({'accuracy': 0.5093220338983051,
  'precision': 0.5551034069703833,
  'recall': 0.5535022438304347,
  'f1-score': 0.5471368092610358,
  'Anger_precision': 0.5692307692307692,
  'Anger_recall': (0.5935828877005348,),
  'Anger_f1-score': 0.5811518324607331,
  'Calmness_precision': 0.5714285714285714,
  'Calmness_recall': (0.75,),
  'Calmness_f1-score': 0.6486486486486486,
  'Disgust_precision': 0.4508670520231214,
  'Disgust_recall': (0.41711229946524064,),
  'Disgust_f1-score': 0.43333333333333335,
  'Fear_precision': 0.40186915887850466,
  'Fear_recall': (0.45989304812834225,),
  'Fear_f1-score': 0.428927680798005,
  'Happiness_precision': 0.4097560975609756,
  'Happiness_recall': (0.44919786096256686,),
  'Happiness_f1-score': 0.4285714285714286,
  'Neutrality_precision': 0.5,
  'Neutrality_recall': (0.6047904191616766,),
  'Neutrality_f1-score': 0.5474254742547425,
  'Sadness_precision': 0.7413793103448276,
  'Sadness_recall': (0.45989304812834225,),
  'Sadness_f1-score': 0.56765676

In [82]:
exp_clf_with_feature_selected(knn1, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.60      0.57      0.58       187
    Calmness       0.67      0.88      0.76        16
     Disgust       0.45      0.37      0.41       187
        Fear       0.44      0.37      0.40       187
   Happiness       0.45      0.55      0.49       187
  Neutrality       0.47      0.62      0.53       167
     Sadness       0.67      0.56      0.61       187
    Surprise       0.68      0.76      0.72        62

    accuracy                           0.52      1180
   macro avg       0.55      0.58      0.56      1180
weighted avg       0.52      0.52      0.52      1180

[[106   0  11  17  41   1   1  10]
 [  0  14   0   0   0   1   0   1]
 [ 13   0  69  22  27  41  14   1]
 [ 17   0  18  70  31  28  17   6]
 [ 31   1  15  18 102  15   4   1]
 [  3   3  18  13  10 103  16   1]
 [  3   1  20  18   9  30 104   2]
 [  3   2   1   2   7   0

({'accuracy': 0.5211864406779662,
  'precision': 0.5534839613835778,
  'recall': 0.5826994611346568,
  'f1-score': 0.5627150350426271,
  'Anger_precision': 0.6022727272727273,
  'Anger_recall': (0.5668449197860963,),
  'Anger_f1-score': 0.5840220385674931,
  'Calmness_precision': 0.6666666666666666,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.7567567567567567,
  'Disgust_precision': 0.45394736842105265,
  'Disgust_recall': (0.3689839572192513,),
  'Disgust_f1-score': 0.40707964601769914,
  'Fear_precision': 0.4375,
  'Fear_recall': (0.37433155080213903,),
  'Fear_f1-score': 0.4034582132564842,
  'Happiness_precision': 0.44933920704845814,
  'Happiness_recall': (0.5454545454545454,),
  'Happiness_f1-score': 0.4927536231884058,
  'Neutrality_precision': 0.4703196347031963,
  'Neutrality_recall': (0.6167664670658682,),
  'Neutrality_f1-score': 0.533678756476684,
  'Sadness_precision': 0.6666666666666666,
  'Sadness_recall': (0.5561497326203209,),
  'Sadness_f1-score': 0.606413

In [83]:
exp_clf_with_feature_selected(knn2, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.62      0.54      0.58       187
    Calmness       0.54      0.81      0.65        16
     Disgust       0.49      0.34      0.40       187
        Fear       0.53      0.39      0.45       187
   Happiness       0.42      0.57      0.48       187
  Neutrality       0.43      0.64      0.51       167
     Sadness       0.64      0.54      0.59       187
    Surprise       0.68      0.76      0.72        62

    accuracy                           0.52      1180
   macro avg       0.55      0.57      0.55      1180
weighted avg       0.53      0.52      0.51      1180

[[101   0   7  18  48   3   0  10]
 [  0  13   0   0   0   0   1   2]
 [ 11   0  63  11  36  51  14   1]
 [ 14   0  12  72  32  31  20   6]
 [ 27   2  10  16 107  18   5   2]
 [  3   5  20   7   9 107  16   0]
 [  3   2  15  11  15  39 101   1]
 [  3   2   1   0   9   0

({'accuracy': 0.5177966101694915,
  'precision': 0.5453504297282685,
  'recall': 0.5732018287256778,
  'f1-score': 0.5472826423915469,
  'Anger_precision': 0.6234567901234568,
  'Anger_recall': (0.5401069518716578,),
  'Anger_f1-score': 0.5787965616045845,
  'Calmness_precision': 0.5416666666666666,
  'Calmness_recall': (0.8125,),
  'Calmness_f1-score': 0.65,
  'Disgust_precision': 0.4921875,
  'Disgust_recall': (0.33689839572192515,),
  'Disgust_f1-score': 0.4,
  'Fear_precision': 0.5333333333333333,
  'Fear_recall': (0.3850267379679144,),
  'Fear_f1-score': 0.44720496894409933,
  'Happiness_precision': 0.41796875,
  'Happiness_recall': (0.5721925133689839,),
  'Happiness_f1-score': 0.48306997742663654,
  'Neutrality_precision': 0.42971887550200805,
  'Neutrality_recall': (0.6407185628742516,),
  'Neutrality_f1-score': 0.5144230769230769,
  'Sadness_precision': 0.643312101910828,
  'Sadness_recall': (0.5401069518716578,),
  'Sadness_f1-score': 0.5872093023255814,
  'Surprise_precision

In [84]:
exp_clf_with_feature_selected(knn3, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.69      0.55      0.61       187
    Calmness       0.58      0.88      0.70        16
     Disgust       0.53      0.33      0.40       187
        Fear       0.53      0.40      0.46       187
   Happiness       0.42      0.58      0.49       187
  Neutrality       0.40      0.65      0.49       167
     Sadness       0.68      0.56      0.62       187
    Surprise       0.70      0.76      0.73        62

    accuracy                           0.53      1180
   macro avg       0.57      0.59      0.56      1180
weighted avg       0.55      0.53      0.53      1180

[[103   0   5  18  47   5   0   9]
 [  0  14   0   0   0   0   1   1]
 [  7   0  61  12  37  60   9   1]
 [ 12   0  10  75  27  36  20   7]
 [ 24   1  11  15 108  24   3   1]
 [  1   6  15   9  12 108  16   0]
 [  0   1  12  11  18  39 105   1]
 [  2   2   1   1   9   0

({'accuracy': 0.5262711864406779,
  'precision': 0.5669915463960128,
  'recall': 0.5871104253025775,
  'f1-score': 0.5620405452684228,
  'Anger_precision': 0.6912751677852349,
  'Anger_recall': (0.5508021390374331,),
  'Anger_f1-score': 0.6130952380952381,
  'Calmness_precision': 0.5833333333333334,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.7000000000000001,
  'Disgust_precision': 0.5304347826086957,
  'Disgust_recall': (0.32620320855614976,),
  'Disgust_f1-score': 0.4039735099337749,
  'Fear_precision': 0.5319148936170213,
  'Fear_recall': (0.40106951871657753,),
  'Fear_f1-score': 0.45731707317073167,
  'Happiness_precision': 0.4186046511627907,
  'Happiness_recall': (0.5775401069518716,),
  'Happiness_f1-score': 0.4853932584269663,
  'Neutrality_precision': 0.39705882352941174,
  'Neutrality_recall': (0.6467065868263473,),
  'Neutrality_f1-score': 0.4920273348519363,
  'Sadness_precision': 0.6818181818181818,
  'Sadness_recall': (0.5614973262032086,),
  'Sadness_f1-sco

In [85]:
exp_clf_with_feature_selected(knn4, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.70      0.56      0.62       187
    Calmness       0.54      0.88      0.67        16
     Disgust       0.53      0.30      0.39       187
        Fear       0.55      0.40      0.46       187
   Happiness       0.42      0.59      0.49       187
  Neutrality       0.41      0.69      0.52       167
     Sadness       0.67      0.55      0.60       187
    Surprise       0.70      0.73      0.71        62

    accuracy                           0.53      1180
   macro avg       0.57      0.59      0.56      1180
weighted avg       0.56      0.53      0.53      1180

[[105   1   3  13  54   2   1   8]
 [  0  14   0   0   0   1   1   0]
 [  6   0  57  13  32  63  15   1]
 [ 12   0   8  74  31  37  19   6]
 [ 23   2  12  12 111  22   3   2]
 [  1   5  14   7  11 116  12   1]
 [  0   2  12  15  15  40 102   1]
 [  2   2   1   1  11   0

({'accuracy': 0.5288135593220339,
  'precision': 0.565686116582003,
  'recall': 0.5870608435965743,
  'f1-score': 0.5577927996900953,
  'Anger_precision': 0.7046979865771812,
  'Anger_recall': (0.5614973262032086,),
  'Anger_f1-score': 0.625,
  'Calmness_precision': 0.5384615384615384,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.6666666666666667,
  'Disgust_precision': 0.5327102803738317,
  'Disgust_recall': (0.3048128342245989,),
  'Disgust_f1-score': 0.3877551020408163,
  'Fear_precision': 0.5481481481481482,
  'Fear_recall': (0.39572192513368987,),
  'Fear_f1-score': 0.4596273291925466,
  'Happiness_precision': 0.4188679245283019,
  'Happiness_recall': (0.5935828877005348,),
  'Happiness_f1-score': 0.49115044247787615,
  'Neutrality_precision': 0.4128113879003559,
  'Neutrality_recall': (0.6946107784431138,),
  'Neutrality_f1-score': 0.5178571428571428,
  'Sadness_precision': 0.6666666666666666,
  'Sadness_recall': (0.5454545454545454,),
  'Sadness_f1-score': 0.6,
  'Sur

In [86]:
exp_clf_with_feature_selected(knn5, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.77      0.58      0.66       187
    Calmness       0.54      0.81      0.65        16
     Disgust       0.56      0.31      0.40       187
        Fear       0.61      0.38      0.47       187
   Happiness       0.44      0.59      0.51       187
  Neutrality       0.38      0.71      0.49       167
     Sadness       0.68      0.58      0.63       187
    Surprise       0.70      0.81      0.75        62

    accuracy                           0.54      1180
   macro avg       0.59      0.60      0.57      1180
weighted avg       0.58      0.54      0.54      1180

[[108   1   2  12  47   6   0  11]
 [  0  13   0   0   0   1   2   0]
 [  1   0  58  12  33  67  14   2]
 [  8   0  10  71  32  43  19   4]
 [ 19   2  13  10 111  26   4   2]
 [  1   5  16   3   9 119  13   1]
 [  2   2   4   7  11  51 109   1]
 [  1   1   1   1   7   1

({'accuracy': 0.5415254237288135,
  'precision': 0.5858851735846976,
  'recall': 0.5969220913227883,
  'f1-score': 0.5698688602416522,
  'Anger_precision': 0.7714285714285715,
  'Anger_recall': (0.5775401069518716,),
  'Anger_f1-score': 0.6605504587155964,
  'Calmness_precision': 0.5416666666666666,
  'Calmness_recall': (0.8125,),
  'Calmness_f1-score': 0.65,
  'Disgust_precision': 0.5576923076923077,
  'Disgust_recall': (0.31016042780748665,),
  'Disgust_f1-score': 0.39862542955326463,
  'Fear_precision': 0.6120689655172413,
  'Fear_recall': (0.37967914438502676,),
  'Fear_f1-score': 0.46864686468646866,
  'Happiness_precision': 0.444,
  'Happiness_recall': (0.5935828877005348,),
  'Happiness_f1-score': 0.5080091533180778,
  'Neutrality_precision': 0.37898089171974525,
  'Neutrality_recall': (0.7125748502994012,),
  'Neutrality_f1-score': 0.4948024948024948,
  'Sadness_precision': 0.6770186335403726,
  'Sadness_recall': (0.5828877005347594,),
  'Sadness_f1-score': 0.6264367816091955,


In [87]:
exp_clf_with_feature_selected(knn6, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.77      0.57      0.66       187
    Calmness       0.54      0.81      0.65        16
     Disgust       0.61      0.28      0.38       187
        Fear       0.63      0.39      0.48       187
   Happiness       0.47      0.64      0.55       187
  Neutrality       0.38      0.75      0.50       167
     Sadness       0.66      0.57      0.61       187
    Surprise       0.67      0.81      0.73        62

    accuracy                           0.55      1180
   macro avg       0.59      0.60      0.57      1180
weighted avg       0.59      0.55      0.54      1180

[[107   1   1  14  46   5   1  12]
 [  0  13   0   0   0   1   2   0]
 [  3   0  52  10  36  70  14   2]
 [ 10   0   5  72  26  47  20   7]
 [ 15   1   9   9 120  27   4   2]
 [  1   6  13   2   6 125  13   1]
 [  2   2   4   7  12  53 106   1]
 [  1   1   1   1   7   1

({'accuracy': 0.5466101694915254,
  'precision': 0.5915895848151245,
  'recall': 0.6014131092868601,
  'f1-score': 0.5694975838105908,
  'Anger_precision': 0.7697841726618705,
  'Anger_recall': (0.5721925133689839,),
  'Anger_f1-score': 0.6564417177914109,
  'Calmness_precision': 0.5416666666666666,
  'Calmness_recall': (0.8125,),
  'Calmness_f1-score': 0.65,
  'Disgust_precision': 0.611764705882353,
  'Disgust_recall': (0.27807486631016043,),
  'Disgust_f1-score': 0.3823529411764706,
  'Fear_precision': 0.6260869565217392,
  'Fear_recall': (0.3850267379679144,),
  'Fear_f1-score': 0.47682119205298007,
  'Happiness_precision': 0.4743083003952569,
  'Happiness_recall': (0.6417112299465241,),
  'Happiness_f1-score': 0.5454545454545454,
  'Neutrality_precision': 0.3799392097264438,
  'Neutrality_recall': (0.7485029940119761,),
  'Neutrality_f1-score': 0.5040322580645161,
  'Sadness_precision': 0.6625,
  'Sadness_recall': (0.5668449197860963,),
  'Sadness_f1-score': 0.6109510086455331,
  '

In [88]:
exp_clf_with_feature_selected(knn7, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.77      0.57      0.65       187
    Calmness       0.57      0.81      0.67        16
     Disgust       0.63      0.28      0.39       187
        Fear       0.68      0.36      0.47       187
   Happiness       0.45      0.65      0.53       187
  Neutrality       0.39      0.80      0.53       167
     Sadness       0.70      0.58      0.64       187
    Surprise       0.64      0.81      0.71        62

    accuracy                           0.55      1180
   macro avg       0.60      0.61      0.57      1180
weighted avg       0.61      0.55      0.55      1180

[[106   1   0   8  56   5   0  11]
 [  0  13   0   0   0   2   1   0]
 [  2   0  52  11  36  71  12   3]
 [  9   0   6  68  30  51  16   7]
 [ 17   2  10   7 121  23   4   3]
 [  1   4   8   2   5 133  13   1]
 [  1   2   6   4  11  51 109   3]
 [  1   1   1   0   7   1

({'accuracy': 0.5525423728813559,
  'precision': 0.6043419505794827,
  'recall': 0.6067326840410949,
  'f1-score': 0.5739860970061066,
  'Anger_precision': 0.7737226277372263,
  'Anger_recall': (0.5668449197860963,),
  'Anger_f1-score': 0.654320987654321,
  'Calmness_precision': 0.5652173913043478,
  'Calmness_recall': (0.8125,),
  'Calmness_f1-score': 0.6666666666666667,
  'Disgust_precision': 0.6265060240963856,
  'Disgust_recall': (0.27807486631016043,),
  'Disgust_f1-score': 0.3851851851851852,
  'Fear_precision': 0.68,
  'Fear_recall': (0.36363636363636365,),
  'Fear_f1-score': 0.4738675958188154,
  'Happiness_precision': 0.4548872180451128,
  'Happiness_recall': (0.6470588235294118,),
  'Happiness_f1-score': 0.5342163355408389,
  'Neutrality_precision': 0.39465875370919884,
  'Neutrality_recall': (0.7964071856287425,),
  'Neutrality_f1-score': 0.5277777777777778,
  'Sadness_precision': 0.6987179487179487,
  'Sadness_recall': (0.5828877005347594,),
  'Sadness_f1-score': 0.63556851

In [89]:
exp_clf_with_feature_selected(knn8, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.79      0.53      0.64       187
    Calmness       0.52      0.88      0.65        16
     Disgust       0.72      0.29      0.41       187
        Fear       0.74      0.36      0.48       187
   Happiness       0.45      0.66      0.54       187
  Neutrality       0.40      0.83      0.54       167
     Sadness       0.68      0.59      0.63       187
    Surprise       0.65      0.85      0.74        62

    accuracy                           0.56      1180
   macro avg       0.62      0.62      0.58      1180
weighted avg       0.63      0.56      0.55      1180

[[100   1   0   8  60   5   1  12]
 [  0  14   0   0   0   2   0   0]
 [  3   0  54   5  37  74  11   3]
 [  5   0   2  67  35  49  22   7]
 [ 16   3   9   5 124  24   4   2]
 [  1   4   5   1   5 138  12   1]
 [  1   4   4   3  10  52 110   3]
 [  1   1   1   1   3   1

({'accuracy': 0.559322033898305,
  'precision': 0.6200587604513221,
  'recall': 0.6236676369100681,
  'f1-score': 0.5793174778377347,
  'Anger_precision': 0.7874015748031497,
  'Anger_recall': (0.5347593582887701,),
  'Anger_f1-score': 0.6369426751592357,
  'Calmness_precision': 0.5185185185185185,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.6511627906976744,
  'Disgust_precision': 0.72,
  'Disgust_recall': (0.2887700534759358,),
  'Disgust_f1-score': 0.41221374045801523,
  'Fear_precision': 0.7444444444444445,
  'Fear_recall': (0.3582887700534759,),
  'Fear_f1-score': 0.48375451263537905,
  'Happiness_precision': 0.45255474452554745,
  'Happiness_recall': (0.6631016042780749,),
  'Happiness_f1-score': 0.5379609544468547,
  'Neutrality_precision': 0.4,
  'Neutrality_recall': (0.8263473053892215,),
  'Neutrality_f1-score': 0.5390625,
  'Sadness_precision': 0.6832298136645962,
  'Sadness_recall': (0.5882352941176471,),
  'Sadness_f1-score': 0.6321839080459771,
  'Surprise_pre

In [90]:
exp_clf_with_feature_selected(knn9, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.81      0.51      0.63       187
    Calmness       0.52      0.88      0.65        16
     Disgust       0.70      0.26      0.38       187
        Fear       0.68      0.33      0.44       187
   Happiness       0.44      0.64      0.52       187
  Neutrality       0.38      0.82      0.52       167
     Sadness       0.66      0.57      0.61       187
    Surprise       0.65      0.85      0.74        62

    accuracy                           0.54      1180
   macro avg       0.60      0.61      0.56      1180
weighted avg       0.62      0.54      0.53      1180

[[ 96   1   0   9  63   5   1  12]
 [  0  14   0   0   0   2   0   0]
 [  2   0  48   8  37  77  12   3]
 [  7   0   3  61  31  54  23   8]
 [ 13   3   9  10 119  28   3   2]
 [  0   5   4   0   7 137  14   0]
 [  0   3   4   2  11  57 107   3]
 [  1   1   1   0   4   1

({'accuracy': 0.538135593220339,
  'precision': 0.604323725659096,
  'recall': 0.6068763531673931,
  'f1-score': 0.5609632708173133,
  'Anger_precision': 0.8067226890756303,
  'Anger_recall': (0.5133689839572193,),
  'Anger_f1-score': 0.6274509803921569,
  'Calmness_precision': 0.5185185185185185,
  'Calmness_recall': (0.875,),
  'Calmness_f1-score': 0.6511627906976744,
  'Disgust_precision': 0.6956521739130435,
  'Disgust_recall': (0.25668449197860965,),
  'Disgust_f1-score': 0.37500000000000006,
  'Fear_precision': 0.6777777777777778,
  'Fear_recall': (0.32620320855614976,),
  'Fear_f1-score': 0.44043321299638993,
  'Happiness_precision': 0.4375,
  'Happiness_recall': (0.6363636363636364,),
  'Happiness_f1-score': 0.5185185185185185,
  'Neutrality_precision': 0.37950138504155123,
  'Neutrality_recall': (0.8203592814371258,),
  'Neutrality_f1-score': 0.5189393939393939,
  'Sadness_precision': 0.6645962732919255,
  'Sadness_recall': (0.5721925133689839,),
  'Sadness_f1-score': 0.614942

In [91]:
exp_clf_with_feature_selected(knn10, X_train, X_test, y_train_e, y_test_e)

Model Name: KNeighborsClassifier;
Train set shape (24885, 128), num of class 8
              precision    recall  f1-score   support

       Anger       0.83      0.49      0.61       187
    Calmness       0.56      0.94      0.70        16
     Disgust       0.75      0.24      0.36       187
        Fear       0.76      0.31      0.44       187
   Happiness       0.43      0.65      0.52       187
  Neutrality       0.37      0.90      0.53       167
     Sadness       0.71      0.53      0.61       187
    Surprise       0.65      0.84      0.73        62

    accuracy                           0.54      1180
   macro avg       0.63      0.61      0.56      1180
weighted avg       0.65      0.54      0.53      1180

[[ 91   1   1   6  69   7   0  12]
 [  0  15   0   0   0   1   0   0]
 [  2   0  45   3  39  87   9   2]
 [  6   0   1  58  32  63  18   9]
 [ 10   3   8   6 121  33   4   2]
 [  0   4   0   1   2 151   9   0]
 [  0   3   4   2  10  65 100   3]
 [  1   1   1   0   7   0

({'accuracy': 0.5364406779661017,
  'precision': 0.6329277650001335,
  'recall': 0.6124565788855272,
  'f1-score': 0.5630318737625171,
  'Anger_precision': 0.8272727272727273,
  'Anger_recall': (0.48663101604278075,),
  'Anger_f1-score': 0.6127946127946128,
  'Calmness_precision': 0.5555555555555556,
  'Calmness_recall': (0.9375,),
  'Calmness_f1-score': 0.6976744186046512,
  'Disgust_precision': 0.75,
  'Disgust_recall': (0.24064171122994651,),
  'Disgust_f1-score': 0.3643724696356275,
  'Fear_precision': 0.7631578947368421,
  'Fear_recall': (0.31016042780748665,),
  'Fear_f1-score': 0.44106463878326996,
  'Happiness_precision': 0.43214285714285716,
  'Happiness_recall': (0.6470588235294118,),
  'Happiness_f1-score': 0.5182012847965738,
  'Neutrality_precision': 0.371007371007371,
  'Neutrality_recall': (0.9041916167664671,),
  'Neutrality_f1-score': 0.5261324041811847,
  'Sadness_precision': 0.7142857142857143,
  'Sadness_recall': (0.5347593582887701,),
  'Sadness_f1-score': 0.611620

In [92]:
# change y_lable into emo
# exp_clf_with_feature_selected(gb_fast, X_train, X_test, y_train_e, y_test_e)

In [93]:
# check how long on normal gradient boosting
# exp_clf_with_feature_selected(gboost, X_train, X_test, y_train_e, y_test_e)

### Threshold tuning

In [94]:
# Calculate accuracy for the given threshold

probabilities = knn.predict_proba(X_test)

def calc_acc_by_thres(probabilities, threshold, y_test):
    predictions_adj = []
    # Loop through each sample's probabilities
    for probs in probabilities:
        if probs[0] > threshold:
            pred_class = -1
        elif probs[1] > probs[2]:
            pred_class = 0
        else:
            pred_class = 1
        predictions_adj.append(pred_class)
    accuracy = np.mean(predictions_adj == y_test)
    precision, recall, f1score, _ = precision_recall_fscore_support(y_test_s, predictions_adj, average=None)
    return accuracy, min(f1score), np.var(f1score)

best_threshold = None
best_accuracy = 0.0
best_f1score = 0.0
# best_f1s_var = 10

# Define a range of threshold values to try
threshold_range = np.linspace(0.25, 0.75, 100)
for threshold in threshold_range:
    accuracy, min_f1_score, var_f1_score = calc_acc_by_thres(probabilities, threshold, y_test_s)
    # if accuracy > best_accuracy:
    #     best_accuracy = accuracy
    #     best_threshold = threshold
    if min_f1_score > best_f1score:
        best_f1score = min_f1_score
        best_threshold = threshold
        best_accuracy = accuracy
    # if var_f1_score < best_f1s_var:
    #     best_f1score = min_f1_score
    #     best_threshold = threshold
    #     best_accuracy = accuracy
    #     best_f1s_var = var_f1_score
        

print("Best Threshold:", best_threshold)
print("Best Accuracy:", best_accuracy)
print("Best min f1 score:", best_f1score)
# print("Best var f1 score:", best_f1s_var)

AttributeError: 'Flags' object has no attribute 'c_contiguous'

In [None]:
# if you want to adjust the threshold; Predict probabilities for each class label;
# best threshold and best_threshold x 110% 120% 130% to see trend
probabilities = knn.predict_proba(X_test)

threshold = best_threshold
print('BEST Threshold:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.1
print('Threshold-2:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.2
print('Threshold-3:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))

threshold = best_threshold*1.3
print('Threshold-4:', threshold) 
predictions_adj = []
for probs in probabilities:
    if probs[0] > threshold:
        pred_class = -1
    elif probs[1] > probs[2]:
        pred_class = 0
    else:
        pred_class = 1
    predictions_adj.append(pred_class)
print(classification_report(y_test_s, predictions_adj))
print(confusion_matrix(y_test_s, predictions_adj))
print("Accuracy:", np.mean(predictions_adj == y_test_s))