In [1]:
import librosa
import librosa.display
from librosa.core import load
import numpy as np
import pandas as pd
import glob
import os, sys
import matplotlib.pyplot as plt
import seaborn as sns
import time

### Load Data

In [2]:
VERSION = 4 
df_joint_train = pd.read_csv(f'./features/cache_all_features_train_V{VERSION}.csv')
feature_column_names = [i for i in df_joint_train.columns \
                        if i not in ['file_path','renamed_file_path','split','sentiment_value','emotional_category']]

df_joint_test = pd.read_csv(f'./features/cache_all_features_test_V{VERSION}.csv')
print("df_joint_train shape: ", df_joint_train.shape)
print("df_joint_test shape: ", df_joint_test.shape)

feat_rank_emo = pd.read_csv('./features/relationship_stats_test_result_emotion_all_V4.csv')
feat_rank_sti = pd.read_csv('./features/relationship_stats_test_result_sentiment_all_V4.csv')
feat_rank_emo.correlation_coeff = abs(feat_rank_emo.correlation_coeff)
feat_rank_sti.correlation_coeff = abs(feat_rank_sti.correlation_coeff)
feature_types = ['mfcc','chroma','mel','zcr','rms','Intensity','Pitch','GNE','HNR','Spectrum','Formant']

df_joint_train shape:  (10981, 1550)
df_joint_test shape:  (1180, 1550)


In [3]:
X_train = df_joint_train[feature_column_names]
y_train_s = df_joint_train['sentiment_value']
y_train_e = df_joint_train['emotional_category']

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_e_num = label_encoder.fit_transform(y_train_e)

X_test = df_joint_test[feature_column_names]
y_test_s = df_joint_test['sentiment_value']
y_test_e = df_joint_test['emotional_category']

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_test_e_num = label_encoder.fit_transform(y_test_e)

### Check feature ranking

In [4]:
feat_rank_emo.describe()

Unnamed: 0,p_value,is_significant,f_score,correlation_coeff,mutual_info_score
count,1546.0,1546.0,1546.0,1546.0,1546.0
mean,0.00013,0.999353,79.626271,0.113689,0.145012
std,0.00216,0.025433,83.84885,0.064382,0.064789
min,0.0,0.0,1.9891,0.0003,0.0043
25%,0.0,1.0,28.025975,0.0589,0.09405
50%,0.0,1.0,60.00225,0.1133,0.15175
75%,0.0,1.0,105.0349,0.1602,0.2043
max,0.0527,1.0,876.182,0.4003,0.3074


In [5]:
features_score_above_p75 = feat_rank_sti[(feat_rank_sti.f_score>52) & 
(feat_rank_sti.correlation_coeff>0.058) & 
(feat_rank_sti.mutual_info_score>0.05)]['feature'].values 

print("features with score above p75 from Sentiment label: ", len(features_score_above_p75))
print(features_score_above_p75)

features with score above p75 from Sentiment label:  68
['Pitch_pitch_slope_without_octave_jumps' 'Pitch_stddev_pitch'
 'Pitch_mean_absolute_pitch_slope' 'Pitch_q3_pitch' 'mfcc_median_33'
 'Pitch_mean_pitch' 'mfcc_mean_33' 'mfcc_median_34' 'mfcc_median_32'
 'Pitch_max_pitch' 'Pitch_median_intensity' 'mfcc_std_21' 'mfcc_mean_32'
 'mfcc_mean_34' 'mfcc_std_22' 'mfcc_std_27' 'mfcc_std_24' 'mfcc_std_23'
 'mfcc_std_20' 'mfcc_p90_22' 'mfcc_std_17' 'mfcc_std_19' 'mfcc_p90_33'
 'Intensity_q3_intensity' 'mfcc_p90_25' 'mfcc_std_18' 'mfcc_std_1'
 'Pitch_min_pitch' 'mel32_std_22' 'mel32_std_23' 'Local Jitter'
 'mel64_std_43' 'mel32_mean_22' 'mel32_mean_23' 'mel64_mean_46'
 'mel64_p10_3' 'mel32_mean_24' 'mel32_std_12' 'mel64_mean_43'
 'mel64_std_42' 'mel64_std_44' 'mel64_mean_42' 'mel32_std_9'
 'mel128_mean_91' 'mel64_mean_44' 'mel32_p90_24' 'mel128_std_84'
 'mel128_mean_84' 'mel64_mean_45' 'mel64_mean_48' 'mel64_std_45'
 'mel128_mean_99' 'mel32_std_11' 'mel32_p90_23' 'mel64_mean_47'
 'mel32_p90_22'

### import Model

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score, precision_recall_fscore_support

dt_clf= DecisionTreeClassifier()
rf_clf = RandomForestClassifier()

def try_clf_with_feature_selected(clf_model, feature_names,y_train,y_test):
    X_train = df_joint_train[feature_names]
    X_test = df_joint_test[feature_names]
    predictions = clf_model.fit(X_train, y_train).predict(X_test)
    print(classification_report(y_test, predictions ))
    print(confusion_matrix(y_test, predictions))

In [7]:
try_clf_with_feature_selected(dt_clf,features_score_above_p75,y_train_s,y_test_s)

              precision    recall  f1-score   support

          -1       0.76      0.81      0.79       748
           0       0.54      0.50      0.52       183
           1       0.59      0.52      0.55       249

    accuracy                           0.70      1180
   macro avg       0.63      0.61      0.62      1180
weighted avg       0.69      0.70      0.70      1180

[[607  61  80]
 [ 83  91   9]
 [105  15 129]]


## Forward Selection by category
- select certain category features
- remove low related features (manual study results in selected_<group>) 
- apply auto feature selection to determin how many feature chosen
- OPTIONAL experiment how much performance drop from full -> selected -> less -> 2PCs?

- Alternative is too do prosody features together AUTO

### Prodosy Features List
Total 56   
['Intensity_min_intensity', 'Intensity_relative_min_intensity_time', 'Intensity_max_intensity', 'Intensity_relative_max_intensity_time', 'Intensity_mean_intensity', 'Intensity_stddev_intensity', 'Intensity_q1_intensity', 'Intensity_median_intensity', 'Intensity_q3_intensity', 'Pitch_voiced_fraction', 'Pitch_min_pitch', 'Pitch_relative_min_pitch_time', 'Pitch_max_pitch', 'Pitch_relative_max_pitch_time', 'Pitch_mean_pitch', 'Pitch_stddev_pitch', 'Pitch_q1_pitch', 'Pitch_median_intensity', 'Pitch_q3_pitch', 'Pitch_mean_absolute_pitch_slope', 'Pitch_pitch_slope_without_octave_jumps', 'GNE_max_gne', 'GNE_mean_gne', 'GNE_stddev_gne', 'GNE_sum_gne', 'HNR_min_hnr', 'HNR_relative_min_hnr_time', 'HNR_max_hnr', 'HNR_relative_max_hnr_time', 'HNR_mean_hnr', 'HNR_stddev_hnr', 'Spectrum_band_energy', 'Spectrum_band_density', 'Spectrum_band_energy_difference', 'Spectrum_band_density_difference', 'Spectrum_center_of_gravity_spectrum', 'Spectrum_stddev_spectrum', 'Spectrum_skewness_spectrum', 'Spectrum_kurtosis_spectrum', 'Spectrum_central_moment_spectrum', 'Formant_f1_mean', 'Formant_f2_mean', 'Formant_f3_mean', 'Formant_f4_mean', 'Formant_f1_median', 'Formant_f2_median', 'Formant_f3_median', 'Formant_f4_median', 'Formant_formant_dispersion', 'Formant_average_formant', 'Formant_mff', 'Formant_fitch_vtl', 'Formant_delta_f', 'Formant_vtl_delta_f', 'Local Jitter', 'Local Shimmer']

In [8]:
feature_names_prosody = ['Intensity_min_intensity', 'Intensity_relative_min_intensity_time', 'Intensity_max_intensity', 'Intensity_relative_max_intensity_time', 'Intensity_mean_intensity', 'Intensity_stddev_intensity', 'Intensity_q1_intensity', 'Intensity_median_intensity', 'Intensity_q3_intensity', 
                         'Pitch_voiced_fraction', 'Pitch_min_pitch', 'Pitch_relative_min_pitch_time', 'Pitch_max_pitch', 'Pitch_relative_max_pitch_time', 'Pitch_mean_pitch', 'Pitch_stddev_pitch', 'Pitch_q1_pitch', 'Pitch_median_intensity', 'Pitch_q3_pitch', 'Pitch_mean_absolute_pitch_slope', 'Pitch_pitch_slope_without_octave_jumps', 
                         'GNE_max_gne', 'GNE_mean_gne', 'GNE_stddev_gne', 'GNE_sum_gne', 'HNR_min_hnr', 'HNR_relative_min_hnr_time', 'HNR_max_hnr', 'HNR_relative_max_hnr_time', 'HNR_mean_hnr', 'HNR_stddev_hnr', 
                         'Spectrum_band_energy', 'Spectrum_band_density', 'Spectrum_band_energy_difference', 'Spectrum_band_density_difference', 'Spectrum_center_of_gravity_spectrum', 'Spectrum_stddev_spectrum', 'Spectrum_skewness_spectrum', 'Spectrum_kurtosis_spectrum', 'Spectrum_central_moment_spectrum', 'Formant_f1_mean', 'Formant_f2_mean', 'Formant_f3_mean', 'Formant_f4_mean', 'Formant_f1_median', 'Formant_f2_median', 'Formant_f3_median', 'Formant_f4_median', 'Formant_formant_dispersion', 'Formant_average_formant', 'Formant_mff', 'Formant_fitch_vtl', 'Formant_delta_f', 'Formant_vtl_delta_f', 
                         'Local Jitter', 'Local Shimmer']
print(len(feature_names_prosody))
# feature_names_prosody_selected = [ name for name in feature_names_prosody if name in features_score_above_p50 ]
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum','Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
selected_GNE = ['GNE_max_gne', 'GNE_stddev_gne', 'GNE_mean_gne', 'GNE_sum_gne']
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
selected_HNR = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
feature_names_prosody_selected = selected_spect + selected_formant + selected_HNR + selected_intensity + selected_pitch + ['Local Jitter','Local Shimmer']
print(len(feature_names_prosody_selected))

56
34


In [11]:
feat_rank_emo[feat_rank_emo.feature.str.contains('Pitch')]

Unnamed: 0,feature,p_value,is_significant,f_score,correlation_coeff,mutual_info_score
2,Pitch_pitch_slope_without_octave_jumps,0.0,1,756.2722,0.061,0.1888
9,Pitch_q3_pitch,0.0,1,486.9693,0.0804,0.2173
10,Pitch_mean_pitch,0.0,1,474.9486,0.1277,0.218
12,Pitch_mean_absolute_pitch_slope,0.0,1,458.2618,0.0704,0.1425
18,Pitch_median_intensity,0.0,1,392.1258,0.1407,0.1977
20,Pitch_q1_pitch,0.0,1,369.649,0.1797,0.2066
31,Pitch_stddev_pitch,0.0,1,316.386,0.0627,0.1463
42,Pitch_min_pitch,0.0,1,288.217,0.1438,0.1891
64,Pitch_max_pitch,0.0,1,237.3622,0.0223,0.1799
558,Pitch_voiced_fraction,0.0,1,79.7698,0.0842,0.0593


## simple filter based on ranking 

In [32]:
all_pitch = ['Pitch_voiced_fraction', 'Pitch_min_pitch', 'Pitch_relative_min_pitch_time', 'Pitch_max_pitch', 'Pitch_relative_max_pitch_time', 'Pitch_mean_pitch', 'Pitch_stddev_pitch', 'Pitch_q1_pitch', 'Pitch_median_intensity', 'Pitch_q3_pitch', 'Pitch_mean_absolute_pitch_slope', 'Pitch_pitch_slope_without_octave_jumps']                          
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
try_clf_with_feature_selected(rf_clf,all_pitch,y_train_s,y_test_s)
try_clf_with_feature_selected(rf_clf,selected_pitch,y_train_s,y_test_s)
len(all_pitch), len(selected_pitch)

              precision    recall  f1-score   support

          -1       0.73      0.95      0.82       748
           0       0.73      0.31      0.44       183
           1       0.81      0.42      0.55       249

    accuracy                           0.74      1180
   macro avg       0.76      0.56      0.60      1180
weighted avg       0.75      0.74      0.71      1180

[[708  17  23]
 [124  57   2]
 [140   4 105]]
              precision    recall  f1-score   support

          -1       0.73      0.93      0.82       748
           0       0.75      0.30      0.43       183
           1       0.75      0.44      0.55       249

    accuracy                           0.73      1180
   macro avg       0.74      0.56      0.60      1180
weighted avg       0.74      0.73      0.70      1180

[[698  16  34]
 [126  55   2]
 [138   2 109]]


(12, 8)

In [13]:
feat_rank_emo[feat_rank_emo.feature.str.contains('Intensity')]

Unnamed: 0,feature,p_value,is_significant,f_score,correlation_coeff,mutual_info_score
1,Intensity_max_intensity,0.0,1,832.9873,0.368,0.2508
3,Intensity_q3_intensity,0.0,1,708.2566,0.2955,0.2374
45,Intensity_median_intensity,0.0,1,282.6895,0.1458,0.1997
46,Intensity_mean_intensity,0.0,1,282.6173,0.1004,0.261
115,Intensity_q1_intensity,0.0,1,178.0872,0.0342,0.1477
145,Intensity_min_intensity,0.0,1,158.0974,0.0074,0.0873
208,Intensity_stddev_intensity,0.0,1,139.5215,0.0473,0.2151
756,Intensity_relative_max_intensity_time,0.0,1,60.9731,0.0311,0.0674
1352,Intensity_relative_min_intensity_time,0.0,1,12.8807,0.0148,0.0698


In [33]:
all_intensity =  ['Intensity_min_intensity', 'Intensity_relative_min_intensity_time', 'Intensity_max_intensity', 
                  'Intensity_relative_max_intensity_time', 'Intensity_mean_intensity', 'Intensity_stddev_intensity', 
                  'Intensity_q1_intensity', 'Intensity_median_intensity', 'Intensity_q3_intensity']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 
                      'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
try_clf_with_feature_selected(rf_clf,all_intensity,y_train_s,y_test_s)
try_clf_with_feature_selected(rf_clf,selected_intensity,y_train_s,y_test_s)
len(all_intensity), len(selected_intensity)

              precision    recall  f1-score   support

          -1       0.72      0.91      0.80       748
           0       0.65      0.42      0.51       183
           1       0.69      0.32      0.44       249

    accuracy                           0.71      1180
   macro avg       0.69      0.55      0.58      1180
weighted avg       0.70      0.71      0.68      1180

[[681  35  32]
 [102  77   4]
 [163   6  80]]
              precision    recall  f1-score   support

          -1       0.71      0.91      0.80       748
           0       0.70      0.44      0.54       183
           1       0.58      0.26      0.36       249

    accuracy                           0.70      1180
   macro avg       0.66      0.53      0.56      1180
weighted avg       0.68      0.70      0.66      1180

[[677  28  43]
 [ 98  81   4]
 [178   7  64]]


(9, 6)

In [20]:
all_spect  =  ['Spectrum_band_energy', 'Spectrum_band_density', 'Spectrum_band_energy_difference', 
               'Spectrum_band_density_difference', 'Spectrum_center_of_gravity_spectrum', 
               'Spectrum_stddev_spectrum', 'Spectrum_skewness_spectrum', 'Spectrum_kurtosis_spectrum', 
               'Spectrum_central_moment_spectrum',]
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum',
                  'Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum',
                  'Spectrum_band_density', 'Spectrum_band_energy']
try_clf_with_feature_selected(dt_clf,all_spect,y_train_s,y_test_s)
try_clf_with_feature_selected(dt_clf,selected_spect,y_train_s,y_test_s)
len(all_spect), len(selected_spect)

              precision    recall  f1-score   support

          -1       0.73      0.76      0.74       748
           0       0.39      0.38      0.39       183
           1       0.42      0.37      0.39       249

    accuracy                           0.62      1180
   macro avg       0.51      0.50      0.51      1180
weighted avg       0.61      0.62      0.61      1180

[[570  78 100]
 [ 87  70  26]
 [128  30  91]]
              precision    recall  f1-score   support

          -1       0.71      0.76      0.74       748
           0       0.40      0.36      0.38       183
           1       0.42      0.37      0.40       249

    accuracy                           0.62      1180
   macro avg       0.51      0.50      0.50      1180
weighted avg       0.60      0.62      0.61      1180

[[571  74 103]
 [ 94  65  24]
 [134  22  93]]


(9, 8)

In [21]:
feat_rank_emo[feat_rank_emo.feature.str.contains('Formant')]

Unnamed: 0,feature,p_value,is_significant,f_score,correlation_coeff,mutual_info_score
16,Formant_f1_median,0.0,1,408.1099,0.3,0.1507
17,Formant_f1_mean,0.0,1,395.9877,0.2928,0.1377
80,Formant_formant_dispersion,0.0,1,213.0383,0.261,0.0921
260,Formant_fitch_vtl,0.0,1,126.4983,0.1724,0.0547
350,Formant_mff,0.0,1,110.9792,0.1648,0.0477
625,Formant_f4_mean,0.0,1,72.2518,0.1586,0.0446
669,Formant_f4_median,0.0,1,68.169,0.1651,0.0446
1089,Formant_delta_f,0.0,1,33.8022,0.0959,0.0247
1110,Formant_vtl_delta_f,0.0,1,31.7923,0.0918,0.0247
1113,Formant_f3_mean,0.0,1,31.6357,0.0201,0.0173


In [34]:
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
all_formant = ['Formant_f1_mean', 'Formant_f2_mean', 'Formant_f3_mean', 'Formant_f4_mean', 'Formant_f1_median', 'Formant_f2_median', 
               'Formant_f3_median', 'Formant_f4_median', 'Formant_formant_dispersion', 'Formant_average_formant', 'Formant_mff', 'Formant_fitch_vtl', 'Formant_delta_f', 'Formant_vtl_delta_f']
try_clf_with_feature_selected(rf_clf,all_formant,y_train_s,y_test_s)
try_clf_with_feature_selected(rf_clf,selected_formant,y_train_s,y_test_s)
len(all_spect), len(selected_spect)

              precision    recall  f1-score   support

          -1       0.67      0.92      0.78       748
           0       0.61      0.16      0.26       183
           1       0.51      0.22      0.31       249

    accuracy                           0.66      1180
   macro avg       0.60      0.44      0.45      1180
weighted avg       0.63      0.66      0.60      1180

[[689  14  45]
 [145  30   8]
 [188   5  56]]
              precision    recall  f1-score   support

          -1       0.67      0.90      0.76       748
           0       0.51      0.14      0.22       183
           1       0.40      0.20      0.27       249

    accuracy                           0.63      1180
   macro avg       0.53      0.41      0.42      1180
weighted avg       0.59      0.63      0.57      1180

[[670  12  66]
 [150  25   8]
 [187  12  50]]


(9, 8)

In [26]:
feat_rank_emo[feat_rank_emo.feature.str.contains('HNR')]

Unnamed: 0,feature,p_value,is_significant,f_score,correlation_coeff,mutual_info_score
468,HNR_stddev_hnr,0.0,1,91.3664,0.1038,0.104
655,HNR_mean_hnr,0.0,1,70.2472,0.0396,0.1416
852,HNR_max_hnr,0.0,1,52.3718,0.0282,0.0245
1094,HNR_relative_min_hnr_time,0.0,1,33.2682,0.1217,0.0787
1205,HNR_relative_max_hnr_time,0.0,1,22.4997,0.1002,0.0499
1325,HNR_min_hnr,0.0,1,14.1812,0.0367,0.0494


In [27]:
selected_pitch = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 
                  'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
selected_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 
                      'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
selected_spect = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum',
                  'Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
selected_formant = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
selected_HNR = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
feature_names_prosody_filtered= selected_spect + selected_formant + selected_HNR + selected_intensity + selected_pitch + ['Local Jitter','Local Shimmer']
len(feature_names_prosody_filtered)

34

In [35]:
try_clf_with_feature_selected(rf_clf,feature_names_prosody,y_train_s,y_test_s)
try_clf_with_feature_selected(rf_clf,feature_names_prosody_filtered,y_train_s,y_test_s)

              precision    recall  f1-score   support

          -1       0.75      0.96      0.85       748
           0       0.81      0.47      0.60       183
           1       0.91      0.43      0.58       249

    accuracy                           0.77      1180
   macro avg       0.83      0.62      0.67      1180
weighted avg       0.80      0.77      0.75      1180

[[721  18   9]
 [ 96  86   1]
 [141   2 106]]
              precision    recall  f1-score   support

          -1       0.76      0.96      0.85       748
           0       0.79      0.48      0.60       183
           1       0.88      0.43      0.58       249

    accuracy                           0.77      1180
   macro avg       0.81      0.62      0.67      1180
weighted avg       0.79      0.77      0.75      1180

[[716  20  12]
 [ 92  88   3]
 [138   3 108]]


In [36]:
try_clf_with_feature_selected(rf_clf,feature_names_prosody,y_train_e,y_test_e)
try_clf_with_feature_selected(rf_clf,feature_names_prosody_filtered,y_train_e,y_test_e)

              precision    recall  f1-score   support

       Anger       0.69      0.75      0.72       187
    Calmness       0.54      0.81      0.65        16
     Disgust       0.51      0.40      0.45       187
        Fear       0.60      0.49      0.54       187
   Happiness       0.63      0.49      0.55       187
  Neutrality       0.59      0.65      0.62       167
     Sadness       0.52      0.75      0.61       187
    Surprise       0.87      0.77      0.82        62

    accuracy                           0.60      1180
   macro avg       0.62      0.64      0.62      1180
weighted avg       0.61      0.60      0.60      1180

[[141   0  14  10  18   3   1   0]
 [  0  13   0   0   0   0   3   0]
 [ 12   0  75  14  14  31  40   1]
 [ 16   0  18  91   6   9  46   1]
 [ 35   1  19  11  92  15  11   3]
 [  0   3  11  10   7 108  28   0]
 [  0   6   6  11   5  16 141   2]
 [  0   1   4   4   3   0   2  48]]
              precision    recall  f1-score   support

       Anger 

In [31]:
len(feature_names_prosody), len(feature_names_prosody_filtered)

(56, 34)

# End

### Feature Tables Summary Part 2 

| feature category   | num of sub attributes  | num of selected   |
| ------------------ | ---------------------- | ----------------- |
|Intensity           |    9                   |    6              |
|pitch               |   12                   |    8              |
|HNR                 |    6                   |    4              |
|GNE                 |    5                   |    0              |
|Local Jitter        |    1                   |    1              |
|Local Shimmer       |    1                   |    1              |
|Spectrum            |    9                   |    8              |
|formant             |   14                   |    6              |
|====================|========================|======================|
| S accuracy - RF   | 77% (min recall 0.43)  |  77%(min recall 0.43) |
| E accuracy - RF   | 60% (min recall 0.40)  |  62%(min recall 0.43) |