# MER algorithm for dynamic features

### Load data

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [3]:
audio_features_dir = '/Users/gioelepozzi/Desktop/data/features_thesis/dynamic_features.csv'
audio_features_df = pd.read_csv(audio_features_dir)

eda_features_dir = '/Users/gioelepozzi/Desktop/data/features_thesis/dynamic_features_EDA.csv'
eda_features_df = pd.read_csv(eda_features_dir)
eda_dataset = eda_features_df.groupby(by=['music_ID', 'frame'], as_index=False).mean() # mean over 10 subjects
eda_dataset = eda_dataset.drop(columns=['subject_ID'])

VA_mean_dir = '/Users/gioelepozzi/Desktop/data/annotations_thesis/dynamic_annotations.csv'
VA_std_dir = '/Users/gioelepozzi/Desktop/data/annotations_thesis/dynamic_annotations_std.csv'
VA_mean_df = pd.read_csv(VA_mean_dir)
VA_std_df = pd.read_csv(VA_std_dir)

audio_df = pd.merge(audio_features_df, VA_mean_df, on=['music_ID', 'frame'])
audio_df = pd.merge(audio_df, VA_std_df, on=['music_ID', 'frame']) # df with audio features and VA values
audio_df = audio_df.fillna(0)

eda_df = pd.merge(eda_dataset, VA_mean_df, on=['music_ID', 'frame'])
eda_df = pd.merge(eda_df, VA_std_df, on=['music_ID', 'frame'])

In [4]:
audio_features_v_mean = audio_df.drop(columns=['music_ID', 'frame', 'Arousal(mean)', 'Arousal(std)', 'Valence(std)'])
audio_features_a_mean = audio_df.drop(columns=['music_ID', 'frame', 'Valence(mean)', 'Arousal(std)', 'Valence(std)'])
audio_features_v_std = audio_df.drop(columns=['music_ID', 'frame', 'Arousal(mean)', 'Valence(mean)', 'Arousal(std)'])
audio_features_a_std = audio_df.drop(columns=['music_ID', 'frame', 'Arousal(mean)', 'Valence(mean)', 'Valence(std)'])

eda_features_v_mean = eda_df.drop(columns=['music_ID', 'frame', 'Arousal(mean)', 'Arousal(std)', 'Valence(std)'])
eda_features_a_mean = eda_df.drop(columns=['music_ID', 'frame', 'Valence(mean)', 'Arousal(std)', 'Valence(std)'])
eda_features_v_std = eda_df.drop(columns=['music_ID', 'frame', 'Arousal(mean)', 'Valence(mean)', 'Arousal(std)'])
eda_features_a_std = eda_df.drop(columns=['music_ID', 'frame', 'Arousal(mean)', 'Valence(mean)', 'Valence(std)'])

arousal_mean = audio_df['Arousal(mean)']
valence_mean = audio_df['Valence(mean)']
arousal_std = audio_df['Arousal(std)']
valence_std = audio_df['Valence(std)']

eda_arousal_mean = eda_df['Arousal(mean)']
eda_valence_mean = eda_df['Valence(mean)']
eda_arousal_std = eda_df['Arousal(std)']
eda_valence_std = eda_df['Valence(std)']

In [5]:
fusion_dataset = pd.merge(audio_features_df, eda_dataset, on=['music_ID', 'frame'])
fusion_features = fusion_dataset.drop(columns=['music_ID', 'frame'])

In [6]:
def load_audio_dataset(data):
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values
    #scaler = StandardScaler(copy=False)
    #scaler.fit_transform(features)
    return features, labels

### Regressors

In [7]:
from sklearn.linear_model import Lasso, ElasticNet, Ridge, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [8]:
def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))

regressors = {
    'LR': LinearRegression(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Ridge': Ridge(),
    'kNN': KNeighborsRegressor(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
}

In [9]:
def cross_val_regression(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores = pd.DataFrame(columns=columns, index=['RMSE'])

    for reg_name, reg in tqdm(regressors.items(), desc='regressors'):
        scorer = {'rmse': make_scorer(rmse)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False) 
        scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()
        #scores.loc['R', reg_name] = reg_score['test_r'].mean()

    mean_rmse = scores.mean(axis=1)
    std_rmse = scores.std(axis=1)
    
    scores['Mean'] = mean_rmse
    scores['std'] = std_rmse
    return scores

def format_scores(scores):
    def highlight(s):
        is_min = s == min(s)
#         is_max = s == max(s)
#         is_max_or_min = (is_min | is_max)
        return ['background-color: yellow' if v else '' for v in is_min]
    scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
    return scores.format('{:.3f}')

### Multiple regressors on audio features

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate

from math import sqrt

import IPython.display as ipd

In [11]:
prefunc = [StandardScaler()]

print('Audio Features:\n')

print('In Arousal (mean) dimension...')
features_a_mean, labels_a_mean = load_audio_dataset(audio_features_a_mean)
scores_a_a_mean = cross_val_regression(regressors, features_a_mean, labels_a_mean, prefunc)
ipd.display(format_scores(scores_a_a_mean))

print('In Valence (mean) dimension ...')
features_v_mean, labels_v_mean = load_audio_dataset(audio_features_v_mean)
scores_a_v_mean = cross_val_regression(regressors, features_v_mean, labels_v_mean, prefunc)
ipd.display(format_scores(scores_a_v_mean))

print('In Arousal (std) dimension...')
features_a_std, labels_a_std = load_audio_dataset(audio_features_a_std)
scores_a_a_std = cross_val_regression(regressors, features_a_std, labels_a_std, prefunc)
ipd.display(format_scores(scores_a_a_std))

print('In Valence (std) dimension...')
features_v_std, labels_v_std = load_audio_dataset(audio_features_v_std)
scores_a_v_std = cross_val_regression(regressors, features_v_std, labels_v_std, prefunc)
ipd.display(format_scores(scores_a_v_std))

regressors:   0%|          | 0/10 [00:00<?, ?it/s]

Audio Features:

In Arousal (mean) dimension...


regressors:  40%|████      | 4/10 [00:06<00:10,  1.74s/it]

KeyboardInterrupt: 

### Multiple regressors on EDA features

In [None]:
prefunc = [StandardScaler()]

print('EDA Features:\n')

print('In Arousal (mean) dimension...')
scores_eda_a_mean = cross_val_regression(regressors, eda_features_a_mean, eda_arousal_mean, prefunc)
ipd.display(format_scores(scores_eda_a_mean))

print('In Valence (mean) dimension...')
scores_eda_v_mean = cross_val_regression(regressors, eda_features_v_mean, eda_valence_mean, prefunc)
ipd.display(format_scores(scores_eda_v_mean))

print('In Arousal (std) dimension...')
scores_eda_a_std = cross_val_regression(regressors, eda_features_a_std, eda_arousal_std, prefunc)
ipd.display(format_scores(scores_eda_a_std))

print('In Valence (std) dimension...')
scores_eda_v_std = cross_val_regression(regressors, eda_features_v_std, eda_valence_std, prefunc)
ipd.display(format_scores(scores_eda_v_std))

### Multiple regressors on audio + EDA features

In [None]:
prefunc = [StandardScaler()]

print('Audio + EDA Features:\n')

print('In Arousal (mean) dimension...')
scores_f_a_mean = cross_val_regression(regressors, fusion_features, arousal_mean, prefunc)
ipd.display(format_scores(scores_f_a_mean))

print('In Valence (mean) dimension...')
scores_f_v_mean = cross_val_regression(regressors, fusion_features, valence_mean, prefunc)
ipd.display(format_scores(scores_f_v_mean))

print('In Arousal (std) dimension...')
scores_f_a_std = cross_val_regression(regressors, fusion_features, arousal_std, prefunc)
ipd.display(format_scores(scores_f_a_std))

print('In Valence (std) dimension...')
scores_f_v_std = cross_val_regression(regressors, fusion_features, valence_std, prefunc)
ipd.display(format_scores(scores_f_v_std))