# ...

### Imports

In [24]:
import joblib
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV

### Data

In [38]:
# Load data
df = pd.read_pickle("../training_dataset_task3/task_3_training_5bdf9a9ed30b9a66_749fa46_pandas.pkl")

df

Unnamed: 0,pianist_id,segment_id,snippet_id,essentia_dissonance_mean,essentia_dissonance_stdev,essentia_dynamic_complexity,essentia_loudness,essentia_onset_rate,essentia_pitch_salience_mean,essentia_pitch_salience_stdev,...,gems_nostalgia_binary,gems_peacefulness_binary,gems_power_binary,gems_joyful_activation_binary,gems_tension_binary,gems_sadness_binary,gemmes_movement_binary,gemmes_force_binary,gemmes_interior_binary,gemmes_wandering_binary
0,1,0,0,0.192237,0.059404,2.040252,16079768.0,0.800000,0.677256,0.095846,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,0,1,0.143425,0.064204,3.138845,35489248.0,2.600000,0.514913,0.092372,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,1,0,2,0.146967,0.056205,2.019706,42130144.0,2.600000,0.560116,0.115359,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1,0,3,0.158810,0.059129,3.567908,40922732.0,1.400000,0.596779,0.116061,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1,0,4,0.168547,0.049648,2.329854,51921612.0,1.600000,0.556160,0.143420,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4175,11,26,2,0.202177,0.050760,2.840705,51977460.0,1.800000,0.688834,0.090235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4176,11,26,3,0.174454,0.063426,5.202462,9471357.0,3.200000,0.661253,0.107627,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4177,11,26,4,0.161152,0.074724,4.605277,7378860.5,2.600000,0.645111,0.093705,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4178,11,26,5,0.192846,0.056795,2.563405,22641664.0,3.200000,0.680604,0.094989,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
# Get training data
X = df.loc[:, "essentia_dissonance_mean":"mirtoolbox_roughness_pct_90"]

# Standardize dataset and add column names
X_std = StandardScaler().fit_transform(X)
X = pd.DataFrame(X_std, columns=X.columns)

X

Unnamed: 0,essentia_dissonance_mean,essentia_dissonance_stdev,essentia_dynamic_complexity,essentia_loudness,essentia_onset_rate,essentia_pitch_salience_mean,essentia_pitch_salience_stdev,essentia_spectral_centroid_mean,essentia_spectral_centroid_stdev,essentia_spectral_complexity_mean,...,mirtoolbox_novelty_std,mirtoolbox_novelty_pct_10,mirtoolbox_novelty_pct_50,mirtoolbox_novelty_pct_90,mirtoolbox_pulseclarity,mirtoolbox_roughness_mean,mirtoolbox_roughness_std,mirtoolbox_roughness_pct_10,mirtoolbox_roughness_pct_50,mirtoolbox_roughness_pct_90
0,0.074674,-0.208634,-0.502540,-0.624442,-0.967537,0.973840,-0.271925,0.821937,3.705908,1.435001,...,-1.557358,-0.132523,-0.953893,-1.487096,-0.263505,0.037973,-0.066389,0.144988,0.050310,0.011099
1,-1.003098,0.364099,0.130841,-0.318021,0.019897,-2.043147,-0.449124,0.322515,0.242491,-1.242223,...,0.492786,-0.132523,-0.489550,0.512566,1.554357,-1.472847,-1.163010,-1.209892,-1.459551,-1.544004
2,-0.924909,-0.590335,-0.514385,-0.213180,0.019897,-1.203086,0.723664,0.060573,-0.316268,-0.968771,...,1.078765,-0.132523,0.217391,0.652920,0.121509,-1.109245,-0.264529,-1.293552,-1.025679,-1.039652
3,-0.663400,-0.241471,0.378212,-0.232242,-0.638392,-0.521754,0.759491,0.142514,0.058750,0.156288,...,-1.281274,-0.112848,-0.826452,-1.366754,0.349013,-1.147165,-0.177751,-1.201103,-1.137358,-0.971610
4,-0.448402,-1.372774,-0.335574,-0.058600,-0.528677,-1.276604,2.155321,-0.075302,-0.236369,0.179727,...,-1.213569,-0.132523,-0.730973,-0.937755,-0.544921,-1.144299,-0.342477,-1.183691,-1.078803,-1.029728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2277,0.294153,-1.240041,-0.041048,-0.057718,-0.418962,1.188996,-0.558156,-0.524739,-0.573189,0.705796,...,0.736401,-0.132523,0.317939,0.667299,0.706086,0.785422,0.874634,0.562600,0.745939,0.859051
2278,-0.317970,0.271249,1.320595,-0.728771,0.349042,0.676441,0.329168,-0.644184,-0.031043,1.216241,...,-0.259996,-0.132523,-0.433302,-0.248294,1.529043,-0.054989,0.351260,-0.225325,0.058192,-0.052252
2279,-0.611698,1.619394,0.976295,-0.761806,0.019897,0.376446,-0.381145,-0.604478,-0.075596,0.614646,...,-0.983595,-0.132523,-0.578879,-0.983935,1.196165,-0.608549,0.667009,-0.678009,-0.780047,-0.175740
2280,0.088125,-0.520028,-0.200922,-0.520848,0.349042,1.036064,-0.315627,-1.000535,-0.315062,0.750070,...,1.771819,-0.132523,1.398023,2.294032,-0.616783,0.639713,0.749981,0.552866,0.557939,0.731791


### Experiments

In [40]:
hl_feat = ["gems_wonder_binary", "gems_transcendence_binary", "gems_tenderness_binary", "gems_nostalgia_binary", "gems_peacefulness_binary",
           "gems_power_binary", "gems_joyful_activation_binary", "gems_tension_binary", "gems_sadness_binary", "gemmes_movement_binary",
           "gemmes_force_binary", "gemmes_interior_binary", "gemmes_wandering_binary"]

In [18]:
# grid search parameters for the different classifiers
parameters = {
        'C': [0.5, 1, 1.25],
        'kernel': ['poly', 'rbf', 'sigmoid'],
        'degree': [3, 5, 6, 8, 10],
        'gamma': ['scale', 'auto']}

# load the data and reset index of dataframe
df: pd.DataFrame = pd.read_pickle(
    "../training_dataset_task3/task_3_training_5bdf9a9ed30b9a66_749fa46_pandas.pkl").reset_index()

# get only the low and mid level features + segment_id
X = df.loc[:, "essentia_dissonance_mean":"mirtoolbox_roughness_pct_90"]

# preprocess dataset
X = StandardScaler().fit_transform(X)

# split the data according to segment_id
# store the splits as tuple (train indices, test_indices)
# for example the training indices are the first 26 segments
# and the test_indices is the last segment 27
cv = []
for i in range(27):
    train_indices = df[df["segment_id"] != i].index.to_list()
    test_indices = df[df["segment_id"] == i].index.to_list()
    cv.append((train_indices, test_indices))

models = []

for feature in hl_feat:
    # target value
    y = df[feature]

    # grid search the parameters for a given classifier
    gs_cv = GridSearchCV(SVC(), parameters, cv=cv, n_jobs=10)
    gs_cv.fit(X, y)

    score = gs_cv.best_score_
    print(f"{feature} - SVM {score: >5}")
    models.append(gs_cv)

gems_wonder_binary - SVM 0.5643380203250411
gems_transcendence_binary - SVM 0.7244921437913061
gems_tenderness_binary - SVM 0.7645290348772383
gems_nostalgia_binary - SVM 0.76300740992505
gems_peacefulness_binary - SVM 0.7991744892642433
gems_power_binary - SVM 0.8130825202856455
gems_joyful_activation_binary - SVM 0.8520902554306636
gems_tension_binary - SVM 0.7958614228081058
gems_sadness_binary - SVM 0.8817263146934642
gemmes_movement_binary - SVM 0.7510775788479066
gemmes_force_binary - SVM 0.8348353708996514
gemmes_interior_binary - SVM 0.760474399863282
gemmes_wandering_binary - SVM 0.7467392315061088


In [41]:
scores = [model.best_score_ for model in models]
np.mean(scores)

0.7731867840398235

In [42]:
for model in models:
    print(model.best_params_)

{'C': 0.5, 'degree': 5, 'gamma': 'auto', 'kernel': 'poly'}
{'C': 0.5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 0.5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 0.5, 'degree': 3, 'gamma': 'auto', 'kernel': 'rbf'}
{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'sigmoid'}
{'C': 0.5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 0.5, 'degree': 3, 'gamma': 'auto', 'kernel': 'sigmoid'}
{'C': 0.5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'sigmoid'}
{'C': 0.5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
{'C': 1.25, 'degree': 3, 'gamma': 'auto', 'kernel': 'rbf'}


In [43]:
from joblib import dump, load
import os

for i, model in enumerate(models):
    dump(model, os.path.join('models', hl_feat[i]))

In [45]:
load('models/gemmes_interior_binary').best_params_

{'C': 0.5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}

In [46]:
X

Unnamed: 0,essentia_dissonance_mean,essentia_dissonance_stdev,essentia_dynamic_complexity,essentia_loudness,essentia_onset_rate,essentia_pitch_salience_mean,essentia_pitch_salience_stdev,essentia_spectral_centroid_mean,essentia_spectral_centroid_stdev,essentia_spectral_complexity_mean,...,mirtoolbox_novelty_std,mirtoolbox_novelty_pct_10,mirtoolbox_novelty_pct_50,mirtoolbox_novelty_pct_90,mirtoolbox_pulseclarity,mirtoolbox_roughness_mean,mirtoolbox_roughness_std,mirtoolbox_roughness_pct_10,mirtoolbox_roughness_pct_50,mirtoolbox_roughness_pct_90
0,0.074674,-0.208634,-0.502540,-0.624442,-0.967537,0.973840,-0.271925,0.821937,3.705908,1.435001,...,-1.557358,-0.132523,-0.953893,-1.487096,-0.263505,0.037973,-0.066389,0.144988,0.050310,0.011099
1,-1.003098,0.364099,0.130841,-0.318021,0.019897,-2.043147,-0.449124,0.322515,0.242491,-1.242223,...,0.492786,-0.132523,-0.489550,0.512566,1.554357,-1.472847,-1.163010,-1.209892,-1.459551,-1.544004
2,-0.924909,-0.590335,-0.514385,-0.213180,0.019897,-1.203086,0.723664,0.060573,-0.316268,-0.968771,...,1.078765,-0.132523,0.217391,0.652920,0.121509,-1.109245,-0.264529,-1.293552,-1.025679,-1.039652
3,-0.663400,-0.241471,0.378212,-0.232242,-0.638392,-0.521754,0.759491,0.142514,0.058750,0.156288,...,-1.281274,-0.112848,-0.826452,-1.366754,0.349013,-1.147165,-0.177751,-1.201103,-1.137358,-0.971610
4,-0.448402,-1.372774,-0.335574,-0.058600,-0.528677,-1.276604,2.155321,-0.075302,-0.236369,0.179727,...,-1.213569,-0.132523,-0.730973,-0.937755,-0.544921,-1.144299,-0.342477,-1.183691,-1.078803,-1.029728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2277,0.294153,-1.240041,-0.041048,-0.057718,-0.418962,1.188996,-0.558156,-0.524739,-0.573189,0.705796,...,0.736401,-0.132523,0.317939,0.667299,0.706086,0.785422,0.874634,0.562600,0.745939,0.859051
2278,-0.317970,0.271249,1.320595,-0.728771,0.349042,0.676441,0.329168,-0.644184,-0.031043,1.216241,...,-0.259996,-0.132523,-0.433302,-0.248294,1.529043,-0.054989,0.351260,-0.225325,0.058192,-0.052252
2279,-0.611698,1.619394,0.976295,-0.761806,0.019897,0.376446,-0.381145,-0.604478,-0.075596,0.614646,...,-0.983595,-0.132523,-0.578879,-0.983935,1.196165,-0.608549,0.667009,-0.678009,-0.780047,-0.175740
2280,0.088125,-0.520028,-0.200922,-0.520848,0.349042,1.036064,-0.315627,-1.000535,-0.315062,0.750070,...,1.771819,-0.132523,1.398023,2.294032,-0.616783,0.639713,0.749981,0.552866,0.557939,0.731791


In [59]:
training_data = X.copy()
print(training_data)

for model, feature in zip(models, hl_feat):
     pred = model.predict(X)
     training_data.insert(0, feature, pred)

training_data

      essentia_dissonance_mean  essentia_dissonance_stdev  \
0                     0.074674                  -0.208634   
1                    -1.003098                   0.364099   
2                    -0.924909                  -0.590335   
3                    -0.663400                  -0.241471   
4                    -0.448402                  -1.372774   
...                        ...                        ...   
2277                  0.294153                  -1.240041   
2278                 -0.317970                   0.271249   
2279                 -0.611698                   1.619394   
2280                  0.088125                  -0.520028   
2281                  0.435541                   3.126546   

      essentia_dynamic_complexity  essentia_loudness  essentia_onset_rate  \
0                       -0.502540          -0.624442            -0.967537   
1                        0.130841          -0.318021             0.019897   
2                       -0.514385   



Unnamed: 0,gemmes_wandering_binary,gemmes_interior_binary,gemmes_force_binary,gemmes_movement_binary,gems_sadness_binary,gems_tension_binary,gems_joyful_activation_binary,gems_power_binary,gems_peacefulness_binary,gems_nostalgia_binary,...,mirtoolbox_novelty_std,mirtoolbox_novelty_pct_10,mirtoolbox_novelty_pct_50,mirtoolbox_novelty_pct_90,mirtoolbox_pulseclarity,mirtoolbox_roughness_mean,mirtoolbox_roughness_std,mirtoolbox_roughness_pct_10,mirtoolbox_roughness_pct_50,mirtoolbox_roughness_pct_90
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,-1.557358,-0.132523,-0.953893,-1.487096,-0.263505,0.037973,-0.066389,0.144988,0.050310,0.011099
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.492786,-0.132523,-0.489550,0.512566,1.554357,-1.472847,-1.163010,-1.209892,-1.459551,-1.544004
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,1.078765,-0.132523,0.217391,0.652920,0.121509,-1.109245,-0.264529,-1.293552,-1.025679,-1.039652
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,-1.281274,-0.112848,-0.826452,-1.366754,0.349013,-1.147165,-0.177751,-1.201103,-1.137358,-0.971610
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,-1.213569,-0.132523,-0.730973,-0.937755,-0.544921,-1.144299,-0.342477,-1.183691,-1.078803,-1.029728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.736401,-0.132523,0.317939,0.667299,0.706086,0.785422,0.874634,0.562600,0.745939,0.859051
2278,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.259996,-0.132523,-0.433302,-0.248294,1.529043,-0.054989,0.351260,-0.225325,0.058192,-0.052252
2279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.983595,-0.132523,-0.578879,-0.983935,1.196165,-0.608549,0.667009,-0.678009,-0.780047,-0.175740
2280,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.771819,-0.132523,1.398023,2.294032,-0.616783,0.639713,0.749981,0.552866,0.557939,0.731791


In [62]:
with open('train_data_pred_features.pkl', 'wb') as f:
    pickle.dump(training_data, f)