# MER algorithm for static features

### Load data

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [8]:
audio_features_dir = '/Users/gioelepozzi/Desktop/data/features_thesis/static_features.csv'
audio_features_df = pd.read_csv(audio_features_dir)

eda_features_dir = '/Users/gioelepozzi/Desktop/data/features_thesis/static_features_EDA.csv'
eda_features_df = pd.read_csv(eda_features_dir)
eda_dataset = eda_features_df.groupby(by=['music_ID'], as_index=False).mean() # mean over 10 subjects
eda_dataset = eda_dataset.drop(columns=['subject_ID'])

VA_mean_dir = '/Users/gioelepozzi/Desktop/data/annotations_thesis/static_annotations.csv'
VA_std_dir = '/Users/gioelepozzi/Desktop/data/annotations_thesis/static_annotations_std.csv'
VA_mean_df = pd.read_csv(VA_mean_dir)
VA_std_df = pd.read_csv(VA_std_dir)

audio_df = pd.merge(audio_features_df, VA_mean_df, on=['music_ID'])
audio_df = pd.merge(audio_df, VA_std_df, on=['music_ID']) # df with audio features and VA values

eda_df = pd.merge(eda_dataset, VA_mean_df, on=['music_ID'])
eda_df = pd.merge(eda_df, VA_std_df, on=['music_ID'])

In [9]:
audio_features_v_mean = audio_df.drop(columns=['music_ID', 'Arousal(mean)', 'Arousal(std)', 'Valence(std)'])
audio_features_a_mean = audio_df.drop(columns=['music_ID', 'Valence(mean)', 'Arousal(std)', 'Valence(std)'])
audio_features_v_std = audio_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Arousal(std)'])
audio_features_a_std = audio_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Valence(std)'])

eda_features_v_mean = eda_df.drop(columns=['music_ID', 'Arousal(mean)', 'Arousal(std)', 'Valence(std)'])
eda_features_a_mean = eda_df.drop(columns=['music_ID', 'Valence(mean)', 'Arousal(std)', 'Valence(std)'])
eda_features_v_std = eda_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Arousal(std)'])
eda_features_a_std = eda_df.drop(columns=['music_ID', 'Arousal(mean)', 'Valence(mean)', 'Valence(std)'])

arousal_mean = audio_df['Arousal(mean)']
valence_mean = audio_df['Valence(mean)']
arousal_std = audio_df['Arousal(std)']
valence_std = audio_df['Valence(std)']

In [11]:
fusion_dataset = pd.merge(audio_features_df, eda_dataset, on=['music_ID'])
fusion_features = fusion_dataset.drop(columns=['music_ID'])

In [4]:
def load_audio_dataset(data):
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values
    #scaler = StandardScaler(copy=False)
    #scaler.fit_transform(features)
    return features, labels

### Regressors

In [5]:
from sklearn.linear_model import Lasso, ElasticNet, Ridge, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [6]:
def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))

regressors = {
    'LR': LinearRegression(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Ridge': Ridge(),
    'kNN': KNeighborsRegressor(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
}

In [7]:
def cross_val_regression(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores = pd.DataFrame(columns=columns, index=['RMSE'])

    for reg_name, reg in regressors.items(): #for reg_name, reg in tqdm(regressors.items(), desc='regressors'):
        scorer = {'rmse': make_scorer(rmse)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False) 
        scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()
        #scores.loc['R', reg_name] = reg_score['test_r'].mean()

    mean_rmse = scores.mean(axis=1)
    std_rmse = scores.std(axis=1)
    
    scores['Mean'] = mean_rmse
    scores['std'] = std_rmse
    return scores

def format_scores(scores):
    def highlight(s):
        is_min = s == min(s)
#         is_max = s == max(s)
#         is_max_or_min = (is_min | is_max)
        return ['background-color: yellow' if v else '' for v in is_min]
    scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
    return scores.format('{:.3f}')

### Multiple regressors on audio features

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate

from math import sqrt

import IPython.display as ipd

In [9]:
prefunc = [StandardScaler()]

print('Audio Features:\n')

print('In Arousal (mean) dimension...')
features_a_mean, labels_a_mean = load_audio_dataset(audio_features_a_mean)
scores_a_a_mean = cross_val_regression(regressors, features_a_mean, labels_a_mean, prefunc)
ipd.display(format_scores(scores_a_a_mean))

print('In Valence (mean) dimension ...')
features_v_mean, labels_v_mean = load_audio_dataset(audio_features_v_mean)
scores_a_v_mean = cross_val_regression(regressors, features_v_mean, labels_v_mean, prefunc)
ipd.display(format_scores(scores_a_v_mean))

print('In Arousal (std) dimension...')
features_a_std, labels_a_std = load_audio_dataset(audio_features_a_std)
scores_a_a_std = cross_val_regression(regressors, features_a_std, labels_a_std, prefunc)
ipd.display(format_scores(scores_a_a_std))

print('In Valence (std) dimension...')
features_v_std, labels_v_std = load_audio_dataset(audio_features_v_std)
scores_a_v_std = cross_val_regression(regressors, features_v_std, labels_v_std, prefunc)
ipd.display(format_scores(scores_a_v_std))

Audio Features:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.102,0.184,0.184,0.1,0.119,0.113,0.208,0.106,0.129,0.138,0.138,0.04


In Valence (mean) dimension ...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.122,0.162,0.162,0.12,0.126,0.119,0.212,0.127,0.143,0.126,0.142,0.029


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.047,0.047,0.047,0.046,0.047,0.05,0.05,0.049,0.051,0.044,0.048,0.002


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.051,0.046,0.046,0.051,0.049,0.047,0.048,0.048,0.053,0.046,0.048,0.003


### Multiple regressors on EDA features

In [33]:
prefunc = [StandardScaler()]

print('EDA Features:\n')

print('In Arousal (mean) dimension...')
scores_eda_a_mean = cross_val_regression(regressors, eda_features_a_mean, arousal_mean, prefunc)
ipd.display(format_scores(scores_eda_a_mean))

print('In Valence (mean) dimension...')
scores_eda_v_mean = cross_val_regression(regressors, eda_features_v_mean, valence_mean, prefunc)
ipd.display(format_scores(scores_eda_v_mean))

print('In Arousal (std) dimension...')
scores_eda_a_std = cross_val_regression(regressors, eda_features_a_std, arousal_std, prefunc)
ipd.display(format_scores(scores_eda_a_std))

print('In Valence (std) dimension...')
scores_eda_v_std = cross_val_regression(regressors, eda_features_v_std, valence_std, prefunc)
ipd.display(format_scores(scores_eda_v_std))

EDA Features:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.0,0.184,0.184,0.0,0.136,0.075,0.097,0.055,0.008,0.18,0.092,0.076


In Valence (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.0,0.162,0.162,0.0,0.122,0.074,0.1,0.053,0.008,0.157,0.084,0.067


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.0,0.047,0.047,0.0,0.036,0.048,0.048,0.045,0.002,0.046,0.032,0.022


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.0,0.046,0.046,0.0,0.035,0.045,0.045,0.043,0.002,0.045,0.031,0.021


### Multiple regressors on audio + EDA features

In [40]:
prefunc = [StandardScaler()]

print('Audio + EDA Features:\n')

print('In Arousal (mean) dimension...')
scores_f_a_mean = cross_val_regression(regressors, fusion_features, arousal_mean, prefunc)
ipd.display(format_scores(scores_f_a_mean))

print('In Valence (mean) dimension...')
scores_f_v_mean = cross_val_regression(regressors, fusion_features, valence_mean, prefunc)
ipd.display(format_scores(scores_f_v_mean))

print('In Arousal (std) dimension...')
scores_f_a_std = cross_val_regression(regressors, fusion_features, arousal_std, prefunc)
ipd.display(format_scores(scores_f_a_std))

print('In Valence (std) dimension...')
scores_f_v_std = cross_val_regression(regressors, fusion_features, valence_std, prefunc)
ipd.display(format_scores(scores_f_v_std))

Audio + EDA Features:

In Arousal (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.109,0.184,0.184,0.103,0.126,0.11,0.139,0.112,0.131,0.149,0.135,0.03


In Valence (mean) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.135,0.162,0.162,0.127,0.129,0.123,0.148,0.138,0.142,0.137,0.14,0.014


In Arousal (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.052,0.047,0.047,0.049,0.046,0.05,0.05,0.049,0.052,0.045,0.049,0.002


In Valence (std) dimension...


Unnamed: 0,LR,Lasso,ElasticNet,Ridge,kNN,SVRrbf,SVRpoly,SVRlinear,DT,RF,Mean,std
RMSE,0.056,0.046,0.046,0.053,0.048,0.047,0.048,0.048,0.052,0.046,0.049,0.004
