# MER algorithm for static features

### Load data

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
audio_features_dir = '/Users/gioelepozzi/Desktop/data/features_thesis/static_features.csv'
audio_features_df = pd.read_csv(audio_features_dir)

eda_features_dir = '/Users/gioelepozzi/Desktop/data/features_thesis/static_features_EDA.csv'
eda_features_df = pd.read_csv(eda_features_dir)
eda_dataset = eda_features_df.groupby(by=['music_ID'], as_index=False).mean() # mean over 10 subjects
eda_dataset = eda_dataset.drop(columns=['subject_ID'])

VA_mean_dir = '/Users/gioelepozzi/Desktop/data/annotations_thesis/static_annotations.csv'
VA_std_dir = '/Users/gioelepozzi/Desktop/data/annotations_thesis/static_annotations_std.csv'
VA_mean_df = pd.read_csv(VA_mean_dir)
VA_std_df = pd.read_csv(VA_std_dir)

audio_df = pd.merge(audio_features_df, VA_mean_df, on=['music_ID'])
audio_df = pd.merge(audio_df, VA_std_df, on=['music_ID']) # df with audio features and VA values

eda_df = pd.merge(eda_dataset, VA_mean_df, on=['music_ID'])
eda_df = pd.merge(eda_df, VA_std_df, on=['music_ID'])

In [3]:
audio_features_v_mean = audio_df.drop(columns=['music_ID', 'Arousal(mean)', 'Arousal(std)', 'Valence(std)'])
audio_features_a_mean = audio_df.drop(columns=['music_ID', 'Valence(mean)', 'Arousal(std)', 'Valence(std)'])
audio_features_v_std = audio_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Arousal(std)'])
audio_features_a_std = audio_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Valence(std)'])

eda_features_v_mean = eda_df.drop(columns=['music_ID', 'Arousal(mean)', 'Arousal(std)', 'Valence(std)'])
eda_features_a_mean = eda_df.drop(columns=['music_ID', 'Valence(mean)', 'Arousal(std)', 'Valence(std)'])
eda_features_v_std = eda_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Arousal(std)'])
eda_features_a_std = eda_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Valence(std)'])

arousal_mean = audio_df['Arousal(mean)']
valence_mean = audio_df['Valence(mean)']
arousal_std = audio_df['Arousal(std)']
valence_std = audio_df['Valence(std)']

In [4]:
fusion_dataset = pd.merge(audio_features_df, eda_dataset, on=['music_ID'])
fusion_features = fusion_dataset.drop(columns=['music_ID'])

In [5]:
def load_audio_dataset(data):
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values
    #scaler = StandardScaler(copy=False)
    #scaler.fit_transform(features)
    return features, labels

### Regressors

In [16]:
from sklearn.linear_model import Lasso, ElasticNet, Ridge, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, f1_score

In [17]:
def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))

def r2(y, y_pred):
    return r2_score(y, y_pred)

def f1(y, y_pred):
    return f1_score(y, y_pred)

regressors = {
    'LR': LinearRegression(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Ridge': Ridge(),
    'kNN': KNeighborsRegressor(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
}

In [18]:
def cross_val_regression_RMSE(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores = pd.DataFrame(columns=columns, index=['RMSE'])

    for reg_name, reg in regressors.items():
        scorer = {'rmse': make_scorer(rmse)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False) 
        scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()

    return scores

def cross_val_regression_r2(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores_r2 = pd.DataFrame(columns=columns, index=['R2'])

    for reg_name, reg in regressors.items():
        scorer_r2 = {'r2': make_scorer(r2)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score_r2 = cross_validate(reg, features, labels, scoring=scorer_r2, cv=10, return_train_score=False) 
        scores_r2.loc['R2', reg_name] = reg_score_r2['test_r2'].mean()

    return scores_r2

def cross_val_regression_f1(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores_f1 = pd.DataFrame(columns=columns, index=['f1'])

    for reg_name, reg in regressors.items():
        scorer_f1 = {'f1': make_scorer(f1)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score_f1 = cross_validate(reg, features, labels, scoring=scorer_f1, cv=10, return_train_score=False) 
        scores_f1.loc['f1', reg_name] = reg_score_r2['test_f1'].mean()

    return scores_f1

In [33]:
def format_scores(scores):
    def highlight(s):
        is_min = s == min(s)
#         is_max = s == max(s)
#         is_max_or_min = (is_min | is_max)
        return ['background-color: yellow' if v else '' for v in is_min]
    scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
    return scores.format('{:.3f}')

### Multiple regressors on audio features

In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate

from math import sqrt

import IPython.display as ipd

In [26]:
prefunc = [StandardScaler()]

print('Audio Features - RMSE score:\n')

print('In Arousal (mean) dimension...')
features_a_mean, labels_a_mean = load_audio_dataset(audio_features_a_mean)
scores_a_a_mean_RMSE = cross_val_regression_RMSE(regressors, features_a_mean, labels_a_mean, prefunc)
ipd.display(format_scores(scores_a_a_mean_RMSE))

print('In Valence (mean) dimension ...')
features_v_mean, labels_v_mean = load_audio_dataset(audio_features_v_mean)
scores_a_v_mean_RMSE = cross_val_regression_RMSE(regressors, features_v_mean, labels_v_mean, prefunc)
ipd.display(format_scores(scores_a_v_mean_RMSE))

print('In Arousal (std) dimension...')
features_a_std, labels_a_std = load_audio_dataset(audio_features_a_std)
scores_a_a_std_RMSE = cross_val_regression_RMSE(regressors, features_a_std, labels_a_std, prefunc)
ipd.display(format_scores(scores_a_a_std_RMSE))

print('In Valence (std) dimension...')
features_v_std, labels_v_std = load_audio_dataset(audio_features_v_std)
scores_a_v_std_RMSE = cross_val_regression_RMSE(regressors, features_v_std, labels_v_std, prefunc)
ipd.display(format_scores(scores_a_v_std_RMSE))

Audio Features - RMSE score:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.102,0.184,0.184,0.1,0.119,0.113,0.209,0.106,0.129,0.136


In Valence (mean) dimension ...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.122,0.162,0.162,0.12,0.126,0.119,0.211,0.127,0.143,0.127


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.047,0.047,0.047,0.046,0.047,0.05,0.05,0.049,0.051,0.045


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.051,0.046,0.046,0.051,0.049,0.047,0.048,0.048,0.052,0.045


In [27]:
prefunc = [StandardScaler()]

print('Audio Features - r2 score:\n')

print('In Arousal (mean) dimension...')
features_a_mean, labels_a_mean = load_audio_dataset(audio_features_a_mean)
scores_a_a_mean_r2 = cross_val_regression_r2(regressors, features_a_mean, labels_a_mean, prefunc)
ipd.display(format_scores(scores_a_a_mean_r2))

print('In Valence (mean) dimension ...')
features_v_mean, labels_v_mean = load_audio_dataset(audio_features_v_mean)
scores_a_v_mean_r2 = cross_val_regression_r2(regressors, features_v_mean, labels_v_mean, prefunc)
ipd.display(format_scores(scores_a_v_mean_r2))

print('In Arousal (std) dimension...')
features_a_std, labels_a_std = load_audio_dataset(audio_features_a_std)
scores_a_a_std_r2 = cross_val_regression_r2(regressors, features_a_std, labels_a_std, prefunc)
ipd.display(format_scores(scores_a_a_std_r2))

print('In Valence (std) dimension...')
features_v_std, labels_v_std = load_audio_dataset(audio_features_v_std)
scores_a_v_std_r2 = cross_val_regression_r2(regressors, features_v_std, labels_v_std, prefunc)
ipd.display(format_scores(scores_a_v_std_r2))

Audio Features - r2 score:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,0.669,-0.039,-0.039,0.68,0.558,0.606,-1.115,0.644,0.477,0.459


In Valence (mean) dimension ...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,0.373,-0.056,-0.056,0.4,0.357,0.418,-2.233,0.333,0.148,0.356


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,0.007,-0.013,-0.013,0.051,-0.009,-0.143,-0.123,-0.097,-0.184,0.136


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,-0.349,-0.026,-0.026,-0.334,-0.187,-0.071,-0.136,-0.104,-0.397,-0.011


### Multiple regressors on EDA features

In [34]:
prefunc = [StandardScaler()]

print('EDA Features - RMSE:\n')

print('In Arousal (mean) dimension...')
scores_eda_a_mean_RMSE = cross_val_regression_RMSE(regressors, eda_features_a_mean, arousal_mean, prefunc)
ipd.display(format_scores(scores_eda_a_mean_RMSE))

print('In Valence (mean) dimension...')
scores_eda_v_mean_RMSE = cross_val_regression_RMSE(regressors, eda_features_v_mean, valence_mean, prefunc)
ipd.display(format_scores(scores_eda_v_mean_RMSE))

print('In Arousal (std) dimension...')
scores_eda_a_std_RMSE = cross_val_regression_RMSE(regressors, eda_features_a_std, arousal_std, prefunc)
ipd.display(format_scores(scores_eda_a_std_RMSE))

print('In Valence (std) dimension...')
scores_eda_v_std_RMSE = cross_val_regression_RMSE(regressors, eda_features_v_std, valence_std, prefunc)
ipd.display(format_scores(scores_eda_v_std_RMSE))

EDA Features - RMSE:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.0,0.184,0.184,0.0,0.136,0.074,0.096,0.055,0.008,0.179


In Valence (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.0,0.162,0.162,0.0,0.121,0.073,0.1,0.053,0.008,0.157


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.0,0.047,0.047,0.0,0.035,0.048,0.048,0.045,0.002,0.045


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.0,0.046,0.046,0.0,0.035,0.045,0.045,0.043,0.002,0.045


In [29]:
prefunc = [StandardScaler()]

print('EDA Features - r2 score:\n')

print('In Arousal (mean) dimension...')
scores_eda_a_mean_r2 = cross_val_regression_r2(regressors, eda_features_a_mean, arousal_mean, prefunc)
ipd.display(format_scores(scores_eda_a_mean_r2))

print('In Valence (mean) dimension...')
scores_eda_v_mean_r2 = cross_val_regression_r2(regressors, eda_features_v_mean, valence_mean, prefunc)
ipd.display(format_scores(scores_eda_v_mean_r2))

print('In Arousal (std) dimension...')
scores_eda_a_std_r2 = cross_val_regression_r2(regressors, eda_features_a_std, arousal_std, prefunc)
ipd.display(format_scores(scores_eda_a_std_r2))

print('In Valence (std) dimension...')
scores_eda_v_std_r2 = cross_val_regression_r2(regressors, eda_features_v_std, valence_std, prefunc)
ipd.display(format_scores(scores_eda_v_std_r2))

EDA Features - r2 score:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,1.0,-0.039,-0.039,1.0,0.435,0.831,0.685,0.907,0.998,0.019


In Valence (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,1.0,-0.056,-0.056,1.0,0.41,0.78,0.48,0.886,0.997,-0.006


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,1.0,-0.013,-0.013,1.0,0.439,-0.072,-0.042,0.067,0.997,0.035


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,1.0,-0.026,-0.026,1.0,0.406,0.015,0.012,0.101,0.997,0.026


### Multiple regressors on audio + EDA features

In [30]:
prefunc = [StandardScaler()]

print('Audio + EDA Features - RMSE score:\n')

print('In Arousal (mean) dimension...')
scores_f_a_mean_RMSE = cross_val_regression_RMSE(regressors, fusion_features, arousal_mean, prefunc)
ipd.display(format_scores(scores_f_a_mean_RMSE))

print('In Valence (mean) dimension...')
scores_f_v_mean_RMSE = cross_val_regression_RMSE(regressors, fusion_features, valence_mean, prefunc)
ipd.display(format_scores(scores_f_v_mean_RMSE))

print('In Arousal (std) dimension...')
scores_f_a_std_RMSE = cross_val_regression_RMSE(regressors, fusion_features, arousal_std, prefunc)
ipd.display(format_scores(scores_f_a_std_RMSE))

print('In Valence (std) dimension...')
scores_f_v_std_RMSE = cross_val_regression_RMSE(regressors, fusion_features, valence_std, prefunc)
ipd.display(format_scores(scores_f_v_std_RMSE))

Audio + EDA Features - RMSE score:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.109,0.184,0.184,0.103,0.126,0.11,0.139,0.112,0.131,0.147


In Valence (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.134,0.162,0.162,0.127,0.129,0.123,0.148,0.138,0.142,0.137


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.052,0.047,0.047,0.049,0.046,0.05,0.05,0.049,0.051,0.045


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
RMSE,0.056,0.046,0.046,0.053,0.048,0.047,0.048,0.048,0.052,0.046


In [35]:
prefunc = [StandardScaler()]

print('Audio + EDA Features - r2 score:\n')

print('In Arousal (mean) dimension...')
scores_f_a_mean_r2 = cross_val_regression_r2(regressors, fusion_features, arousal_mean, prefunc)
ipd.display(format_scores(scores_f_a_mean_r2))

print('In Valence (mean) dimension...')
scores_f_v_mean_r2 = cross_val_regression_r2(regressors, fusion_features, valence_mean, prefunc)
ipd.display(format_scores(scores_f_v_mean_r2))

print('In Arousal (std) dimension...')
scores_f_a_std_r2 = cross_val_regression_r2(regressors, fusion_features, arousal_std, prefunc)
ipd.display(format_scores(scores_f_a_std_r2))

print('In Valence (std) dimension...')
scores_f_v_std_r2 = cross_val_regression_r2(regressors, fusion_features, valence_std, prefunc)
ipd.display(format_scores(scores_f_v_std_r2))

Audio + EDA Features - r2 score:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,0.622,-0.039,-0.039,0.662,0.51,0.623,0.346,0.599,0.465,0.332


In Valence (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,0.243,-0.056,-0.056,0.322,0.333,0.389,-0.014,0.204,0.162,0.244


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,-0.222,-0.013,-0.013,-0.101,0.027,-0.16,-0.159,-0.09,-0.223,0.081


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF
R2,-0.547,-0.026,-0.026,-0.41,-0.14,-0.087,-0.115,-0.116,-0.356,-0.009
