# MER algorithm for static features

### Load data

In [2]:
import pandas as pd
import numpy as np

In [13]:
audio_features_dir = '/Users/gioelepozzi/Desktop/MasterThesis/code/features_extraction/dataframe.csv'
audio_features_df = pd.read_csv(audio_features_dir, index_col = 0)

music_ID = audio_features_df['music_ID']
valence_mean = audio_features_df['valence(mean)']
arousal_mean = audio_features_df['arousal(mean)']
valence_std = audio_features_df['valence(std)']
arousal_std = audio_features_df['arousal(std)']

audio_features = audio_features_df.iloc[:,5:-1]
audio_features_v_mean = audio_features.join(valence_mean).dropna()
audio_features_a_mean = audio_features.join(arousal_mean).dropna()
audio_features_v_std = audio_features.join(valence_std).dropna()
audio_features_a_std = audio_features.join(arousal_std).dropna()

audio_features_with_music_ID = audio_features.join(music_ID).dropna()

eda_features_dir = '/Users/gioelepozzi/Desktop/MasterThesis/code/eda_feature_extraction/dataframe_EDA.csv'
eda_features_df = pd.read_csv(eda_features_dir, index_col= 0)
eda_dataset = eda_features_df.groupby(by=['music_ID'], as_index=False).mean()

eda_features = eda_dataset.iloc[:,6:-1]
eda_features_v_mean = eda_features.join(valence_mean).dropna()
eda_features_a_mean = eda_features.join(arousal_mean).dropna()
eda_features_v_std = eda_features.join(valence_std).dropna()
eda_features_a_std = eda_features.join(arousal_std).dropna()

In [4]:
def load_audio_dataset(data):
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values
    #scaler = StandardScaler(copy=False)
    #scaler.fit_transform(features)
    return features, labels

### Regressors

In [5]:
from sklearn.linear_model import Lasso, ElasticNet, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [6]:
def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))

regressors = {
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Ridge': Ridge(),
    'kNN': KNeighborsRegressor(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
}

In [7]:
from tqdm import tqdm

In [29]:
def cross_val_regression(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores = pd.DataFrame(columns=columns, index=['RMSE'])

    for reg_name, reg in regressors.items(): #for reg_name, reg in tqdm(regressors.items(), desc='regressors'):
        scorer = {'rmse': make_scorer(rmse)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False) 
        scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()
        #scores.loc['R', reg_name] = reg_score['test_r'].mean()

    mean_rmse = scores.mean(axis=1)
    std_rmse = scores.std(axis=1)
    
    scores['Mean'] = mean_rmse
    scores['std'] = std_rmse
    return scores

def format_scores(scores):
    def highlight(s):
        is_min = s == min(s)
#         is_max = s == max(s)
#         is_max_or_min = (is_min | is_max)
        return ['background-color: yellow' if v else '' for v in is_min]
    scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
    return scores.format('{:.3f}')

### Multiple regressors on audio features

In [27]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate

from math import sqrt

import IPython.display as ipd

In [30]:
prefunc = [StandardScaler()]

print('Audio Features:\n')

print('In Arousal (mean) dimension...')
features_a_mean, labels_a_mean = load_audio_dataset(audio_features_a_mean)
scores_a_a_mean = cross_val_regression(regressors, features_a_mean, labels_a_mean, prefunc)
ipd.display(format_scores(scores_a_a_mean))

print('In Valence (mean) dimension ...')
features_v_mean, labels_v_mean = load_audio_dataset(audio_features_v_mean)
scores_a_v_mean = cross_val_regression(regressors, features_v_mean, labels_v_mean, prefunc)
ipd.display(format_scores(scores_a_v_mean))

print('In Arousal (std) dimension...')
features_a_std, labels_a_std = load_audio_dataset(audio_features_a_std)
scores_a_a_std = cross_val_regression(regressors, features_a_std, labels_a_std, prefunc)
ipd.display(format_scores(scores_a_a_std))

print('In Valence (std) dimension...')
features_v_std, labels_v_std = load_audio_dataset(audio_features_v_std)
scores_a_v_std = cross_val_regression(regressors, features_v_std, labels_v_std, prefunc)
ipd.display(format_scores(scores_a_v_std))

Audio Features:

In Arousal (mean) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.184,0.184,0.107,0.126,0.118,0.205,0.107,0.121,0.122,0.142,0.038


In Valence (mean) dimension ...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.162,0.162,0.12,0.132,0.126,0.163,0.122,0.149,0.127,0.14,0.019


In Arousal (std) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.047,0.047,0.045,0.046,0.05,0.053,0.051,0.049,0.044,0.048,0.003


In Valence (std) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.046,0.046,0.046,0.049,0.047,0.05,0.049,0.05,0.046,0.048,0.002


### Multiple regressors on EDA features

In [11]:
prefunc = [StandardScaler()]

print('EDA Features:\n')

print('In Arousal (mean) dimension...')
scores_eda_a_mean = cross_val_regression(regressors, eda_features_a_mean, arousal_mean, prefunc)
ipd.display(format_scores(scores_eda_a_mean))

print('In Valence (mean) dimension...')
scores_eda_v_mean = cross_val_regression(regressors, eda_features_v_mean, valence_mean, prefunc)
ipd.display(format_scores(scores_eda_v_mean))

print('In Arousal (std) dimension...')
scores_eda_a_std = cross_val_regression(regressors, eda_features_a_std, arousal_std, prefunc)
ipd.display(format_scores(scores_eda_a_std))

print('In Valence (std) dimension...')
scores_eda_v_std = cross_val_regression(regressors, eda_features_v_std, valence_std, prefunc)
ipd.display(format_scores(scores_eda_v_std))

EDA Features:

In Arousal (mean) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.184,0.184,0.0,0.097,0.062,0.372,0.052,0.008,0.172,0.126,0.117


In Valence (mean) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.162,0.162,0.0,0.084,0.061,0.241,0.051,0.007,0.148,0.102,0.081


In Arousal (std) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.047,0.047,0.0,0.026,0.047,0.048,0.044,0.002,0.043,0.034,0.02


In Valence (std) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.046,0.046,0.0,0.026,0.043,0.045,0.042,0.002,0.041,0.032,0.019


## Multimodal Emotion Recognition Based on Fusion Featrues

Evaluating multimodal fusion methods using early-fusion-by-feature-concatenation (EFFC), which means concatenate the audio and text features to a single feature vector and train a single classification model.

### Multiple regressors on audio + EDA features

In [14]:
fusion_dataset = pd.merge(eda_dataset, audio_features_with_music_ID, on=['music_ID']).dropna()
fusion_features = fusion_dataset.drop(columns=['music_ID','subject_ID','valence(mean)','arousal(mean)','valence(std)','arousal(std)'])

In [15]:
prefunc = [StandardScaler()]

print('Audio + EDA Features:\n')

print('In Arousal (mean) dimension...')
scores_f_a_mean = cross_val_regression(regressors, fusion_features, arousal_mean, prefunc)
ipd.display(format_scores(scores_f_a_mean))

print('In Valence (mean) dimension...')
scores_f_v_mean = cross_val_regression(regressors, fusion_features, valence_mean, prefunc)
ipd.display(format_scores(scores_f_v_mean))

print('In Arousal (std) dimension...')
scores_f_a_std = cross_val_regression(regressors, fusion_features, arousal_std, prefunc)
ipd.display(format_scores(scores_f_a_std))

print('In Valence (std) dimension...')
scores_f_v_std = cross_val_regression(regressors, fusion_features, valence_std, prefunc)
ipd.display(format_scores(scores_f_v_std))

Audio + EDA Features:

In Arousal (mean) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.184,0.184,0.106,0.128,0.117,0.242,0.108,0.121,0.138,0.148,0.046


In Valence (mean) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.162,0.162,0.126,0.131,0.127,0.284,0.135,0.148,0.139,0.157,0.049


In Arousal (std) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.047,0.047,0.047,0.046,0.05,0.052,0.051,0.05,0.044,0.048,0.003


In Valence (std) dimension...


Unnamed: 0,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.046,0.046,0.047,0.049,0.048,0.049,0.048,0.051,0.046,0.048,0.002
