In [1]:
import librosa
import librosa.display
from librosa.core import load
import numpy as np
import pandas as pd
import glob
import os, sys
import matplotlib.pyplot as plt

from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

### Load Data

In [2]:
VERSION = 4
df_joint_train = pd.read_csv(f'./features/cache_all_features_train_V{VERSION}.csv')
feature_column_names = [i for i in df_joint_train.columns \
                        if i not in ['file_path','renamed_file_path','split','sentiment_value','emotional_category']]

df_joint_test = pd.read_csv(f'./features/cache_all_features_test_V{VERSION}.csv')

print("shape of train set: ", df_joint_train.shape)
print("shape of test  set: ", df_joint_test.shape)

shape of train set:  (10981, 1550)
shape of test  set:  (1180, 1550)


In [3]:
X_train = df_joint_train[feature_column_names]
y_train_s = df_joint_train['sentiment_value']
y_train_e = df_joint_train['emotional_category']

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_e_num = label_encoder.fit_transform(y_train_e)

X_test = df_joint_test[feature_column_names]
y_test_s = df_joint_test['sentiment_value']
y_test_e = df_joint_test['emotional_category']

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_test_e_num = label_encoder.fit_transform(y_test_e)

In [4]:
feat_rank_emo = pd.read_csv('./features/relationship_stats_test_result_emotion_all_V4.csv')
feat_rank_s   = pd.read_csv('./features/relationship_stats_test_result_sentiment_all_V4.csv')
feat_rank_emo.correlation_coeff = abs(feat_rank_emo.correlation_coeff)
feat_rank_s.correlation_coeff = abs(feat_rank_s.correlation_coeff)

In [5]:
# import selected features by group
feature_intensity = ['Intensity_max_intensity', 'Intensity_q3_intensity','Intensity_median_intensity', 'Intensity_mean_intensity', 'Intensity_stddev_intensity','Intensity_relative_max_intensity_time']
feature_pitch     = ['Pitch_pitch_slope_without_octave_jumps', 'Pitch_q3_pitch','Pitch_stddev_pitch', 'Pitch_mean_absolute_pitch_slope','Pitch_mean_pitch', 'Pitch_max_pitch', 'Pitch_q1_pitch', 'Pitch_min_pitch']
feature_HNR       = ['HNR_stddev_hnr', 'HNR_mean_hnr','HNR_relative_min_hnr_time','HNR_max_hnr']
feature_spect     = ['Spectrum_band_energy_difference','Spectrum_band_density_difference','Spectrum_center_of_gravity_spectrum','Spectrum_skewness_spectrum','Spectrum_kurtosis_spectrum', 'Spectrum_stddev_spectrum','Spectrum_band_density', 'Spectrum_band_energy']
feature_formant   = ['Formant_f1_mean','Formant_f1_median','Formant_f3_mean','Formant_fitch_vtl','Formant_mff','Formant_formant_dispersion']
feature_local     = ['Local Jitter', 'Local Shimmer']
feature_names_prosody = feature_intensity+feature_pitch+feature_HNR+feature_spect+ feature_formant+feature_local
print(len(feature_names_prosody))

34


In [6]:
from utils import generate_selected_features_by_type
# Define feature groups
feature_groups = {
    "MFCC20_mean":   generate_selected_features_by_type(feature_column_names,"mfcc","mean",20),
    "MFCC20_median": generate_selected_features_by_type(feature_column_names,"mfcc","median",20),
    "MFCC20_std":    generate_selected_features_by_type(feature_column_names,"mfcc","std",20),
    "MFCC20_min":    generate_selected_features_by_type(feature_column_names,"mfcc","p10",20),
    "MFCC20_max":    generate_selected_features_by_type(feature_column_names,"mfcc","p90",20),
    
    "Mel32_mean":   generate_selected_features_by_type(feature_column_names,"mel32","mean",32),
    "Mel32_median": generate_selected_features_by_type(feature_column_names,"mel32","median",32),
    "Mel32_std":    generate_selected_features_by_type(feature_column_names,"mel32","std",32),
    "Mel32_min":    generate_selected_features_by_type(feature_column_names,"mel32","p10",32),
    "Mel32_max":    generate_selected_features_by_type(feature_column_names,"mel32","p90",32),
    
    "chroma_mean":  generate_selected_features_by_type(feature_column_names,"chroma","mean",12),
    "chroma_median":generate_selected_features_by_type(feature_column_names,"chroma","median",12),
    "chroma_std":   generate_selected_features_by_type(feature_column_names,"chroma","std",12),
    "chroma_min":   generate_selected_features_by_type(feature_column_names,"chroma","p10",12),
    "chroma_max":   generate_selected_features_by_type(feature_column_names,"chroma","p90",12),
    "zcr":          generate_selected_features_by_type(feature_column_names,"zcr","",5),
    "rms":          generate_selected_features_by_type(feature_column_names,"rms","",5),
    "intensity":    feature_intensity,
    "pitch":        feature_pitch, 
    # "GNE":          feature_GNE,
    "HNR ":         feature_HNR,
    "spect ":       feature_spect,
    "formant ":     feature_formant,
    "local":        feature_local 
}

## feature selection by group - Recursive Feature Elimination

In [7]:
# Initialize lists to store concatenated features and feature group indices
total_features = []
feature_group_indices = {}
feature_group_dim     = {}
index_counter = 0
for group_name, group_features in feature_groups.items():
    total_features.extend(group_features)
    group_indices = list(range(index_counter, index_counter + len(group_features)))
    feature_group_indices[group_name] = group_indices
    feature_group_dim[group_name] =len(group_features)
    index_counter += len(group_features)

# print("Total features:", total_features)
# print("Feature group indices:", feature_group_indices)

In [9]:
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train[total_features], y_train_e)
importances = model.feature_importances_

group_importance_mean = {}
for group_name, group_indices in feature_group_indices.items():
    group_importances = importances[group_indices]
    importance_mean = np.mean(group_importances)
    group_importance_mean[group_name] = importance_mean

print("Sum of feature importances for each group:")
sorted_group_importance_mean = dict(sorted(group_importance_mean.items(), key=lambda x: x[1], reverse=True))
sorted_group_importance_mean

Sum of feature importances for each group:


{'pitch': 0.007695264339322416,
 'local': 0.0050436193228019805,
 'intensity': 0.004041294418109032,
 'rms': 0.0033659617391680343,
 'MFCC20_std': 0.003260523119383899,
 'Mel32_mean': 0.0031613356352446194,
 'Mel32_std': 0.00295502037286027,
 'spect ': 0.002919362019581437,
 'MFCC20_max': 0.002914518023949884,
 'HNR ': 0.002904043387724187,
 'Mel32_max': 0.0028816507676728293,
 'MFCC20_mean': 0.0027373472125798767,
 'MFCC20_median': 0.0025296430047456126,
 'chroma_mean': 0.0025217427083971122,
 'chroma_median': 0.0025197145469124417,
 'chroma_min': 0.0025184172857359165,
 'formant ': 0.002505008258833124,
 'MFCC20_min': 0.002485473855571685,
 'zcr': 0.0024501421741246637,
 'Mel32_median': 0.0023813666721814626,
 'chroma_std': 0.0019142886478925355,
 'chroma_max': 0.0015860496802866059,
 'Mel32_min': 0.0015502697566919857}

In [10]:
last_feature_group_name = list(sorted_group_importance_mean.keys())[-1]

while feature_groups is not None :
    removed_group_features = feature_groups.pop(last_feature_group_name)
    
    total_features = [] 
    feature_group_indices = {}
    index_counter = 0
    for group_name, group_features in feature_groups.items():
        total_features.extend(group_features)
        group_indices = list(range(index_counter, index_counter + len(group_features)))
        feature_group_indices[group_name] = group_indices
        index_counter += len(group_features)
    if len(total_features) < 100:
        print(f"reach the ideal feature dimension - 100 at {len(total_features)} ") 
        break
    else:
        print(f"current feature dimension - {len(total_features)}")

    # Redo the Random Forest model classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train[total_features], y_train_s)
    importances = rf_classifier.feature_importances_
    
    sorted_group_importance_mean = {}
    for group_name, group_indices in feature_group_indices.items():
        group_importances = importances[group_indices]
        importance_mean = np.mean(group_importances)
        sorted_group_importance_mean[group_name] = round(importance_mean,5)
    

    sorted_group_importance_mean = dict(sorted(sorted_group_importance_mean.items(), key=lambda x: x[1], reverse=True))
    print("Average of feature importances for each group (sorted):", sorted_group_importance_mean)
    
    last_feature_group_name = list(sorted_group_importance_mean.keys())[-1]
    print("To remove least importance feature group:", last_feature_group_name)


current feature dimension - 332
Average of feature importances for each group (sorted): {'pitch': 0.01343, 'local': 0.00595, 'intensity': 0.00444, 'MFCC20_std': 0.00357, 'HNR ': 0.00327, 'rms': 0.00319, 'chroma_median': 0.00298, 'MFCC20_mean': 0.00295, 'MFCC20_min': 0.00284, 'MFCC20_max': 0.00284, 'chroma_mean': 0.00281, 'Mel32_mean': 0.00274, 'formant ': 0.00273, 'Mel32_max': 0.00269, 'chroma_min': 0.00263, 'MFCC20_median': 0.00261, 'Mel32_std': 0.00256, 'spect ': 0.00254, 'zcr': 0.00246, 'Mel32_median': 0.00244, 'chroma_std': 0.00213, 'chroma_max': 0.00162}
To remove least importance feature group: chroma_max
current feature dimension - 320
Average of feature importances for each group (sorted): {'pitch': 0.01357, 'local': 0.00589, 'intensity': 0.00485, 'MFCC20_std': 0.00359, 'HNR ': 0.00314, 'rms': 0.00312, 'chroma_mean': 0.00306, 'chroma_median': 0.00306, 'MFCC20_max': 0.003, 'MFCC20_mean': 0.00298, 'Mel32_max': 0.00286, 'MFCC20_min': 0.00284, 'formant ': 0.00276, 'Mel32_mean': 0.0

In [11]:
model.fit(X_train[total_features], y_train_s)
predictions = model.predict(X_test[total_features]) 
print(classification_report(y_test_s, predictions))
print(confusion_matrix(y_test_s, predictions))

              precision    recall  f1-score   support

          -1       0.75      0.97      0.85       748
           0       0.85      0.44      0.58       183
           1       0.92      0.43      0.58       249

    accuracy                           0.77      1180
   macro avg       0.84      0.61      0.67      1180
weighted avg       0.80      0.77      0.75      1180

[[726  13   9]
 [102  81   0]
 [142   1 106]]


In [12]:
model.fit(X_train[total_features], y_train_e)
predictions = model.predict(X_test[total_features]) 
print(classification_report(y_test_e, predictions))
print(confusion_matrix(y_test_e, predictions))

              precision    recall  f1-score   support

       Anger       0.76      0.85      0.80       187
    Calmness       0.54      0.94      0.68        16
     Disgust       0.54      0.42      0.47       187
        Fear       0.64      0.53      0.58       187
   Happiness       0.69      0.48      0.57       187
  Neutrality       0.63      0.71      0.67       167
     Sadness       0.54      0.76      0.63       187
    Surprise       0.89      0.76      0.82        62

    accuracy                           0.64      1180
   macro avg       0.65      0.68      0.65      1180
weighted avg       0.64      0.64      0.63      1180

[[159   0  12   4   8   2   2   0]
 [  0  15   0   0   0   0   1   0]
 [ 10   2  79  15  15  29  37   0]
 [  8   0  13  99   6  11  48   2]
 [ 33   2  22  15  90  11  12   2]
 [  0   4  12   7   3 119  22   0]
 [  0   5   4   9   6  18 143   2]
 [  0   0   5   5   3   0   2  47]]


### Feature Tables Summary combined 

| feature category   | num of sub attributes  | num of selected   | remain |
| ------------------ | ---------------------- | ----------------- | ------ |
|Intensity           |    9                   |    6              |  True  |
|pitch               |   12                   |    8              |  True  |
|HNR                 |    6                   |    4              |  True  |
|Local Jitter        |    1                   |    1              |  True  |
|Local Shimmer       |    1                   |    1              |  True  |
|Spectrum            |    9                   |    8              | False  |
|formant             |   14                   |    6              | False  |
| RMS                |   5                    |    5              | True   |
|Mel                 |   32/64/128 *5         |   32   mean       | True   |
|MFCC                |   20/40 *5             |   20*2 mean std   | True   |  
|Chroma              |   12   *5              |    0              | False  |
|====================|========================|======================|
| S accuracy - RF   | |  |  77%(min recall 0.43) |
| E accuracy - RF   | |  |  64%(min recall 0.42) |