Project name: DJ-Running

Authors: Jorge García de Quirós, Sandra Baldassarri, Pedro Álvarez

Affiliation/Institution: Computer Science and Systems Engineering Department, University of Zaragoza (Spain)

Paper: RIADA: a machine-learning based infrastructure for recognising the emotions of the Spotify songs

Date: October, 2020

In [13]:
import warnings
import pandas as pd
warnings.simplefilter("ignore")

#One vs rest classifiers
from sklearn import preprocessing
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

#Feature selection
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif

from sklearn.model_selection import train_test_split

import numpy as np
import configparser

%run Tools.ipynb

In [14]:
config = configparser.ConfigParser()
config.read('../../config.ini'.decode())

CLIENT_ID = config['DEFAULT']['SP_CLIENT_ID']
CLIENT_SECRET = config['DEFAULT']['SP_CLIENT_SECRET']

CSM = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
SP = spotipy.Spotify(client_credentials_manager=CSM, requests_timeout=20)

FEATURES_M= ["id","valence", "energy" ,"liveness", "tempo", "speechiness", "acousticness", "instrumentalness", "danceability","key", "duration_ms","loudness","mode"]

In [23]:
#Hyperparams for each model
l_svc_param = {"tol": np.logspace(-8, -2, 7),
                "C": np.logspace(-2, 1, 4),
                "class_weight": ["balanced", None], 'random_state': [0]}

nn_param = {"n_neighbors": range(3,16,2), "weights": ["uniform", "distance"],
                "algorithm" : ["auto", "ball_tree", "kd_tree"], "leaf_size": range(15,46,3), 'n_jobs': [-1]}

rf_param = {"n_estimators": np.arange(10,200,25),"criterion": ["gini","entropy"],
               "min_samples_leaf": np.arange(2,10,5), "min_samples_split" : [2,5, 10],  
            'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 'bootstrap': [True, False],
           'random_state': [0], 'n_jobs': [-1]}


models = [[LinearSVC(random_state=0),l_svc_param],[KNeighborsClassifier(n_jobs=-1),nn_param],[RandomForestClassifier(n_jobs=-1, random_state=0),rf_param]]

feat_select_aprox = [f_classif, mutual_info_classif, chi2]

SS_GE= pd.read_csv('../Datasets/fSP_lSP.csv')

SS_GE.tail()

Unnamed: 0,id,score,happy,sad,angry,relaxed
5187,7MWYv1m0d023BdbLyG6NQE,2.494763,0,0,0,1
5188,5FoZCDCl6WhrAIm4Ygyjg7,2.494763,0,0,0,1
5189,4s6LhHAV5SEsOV0lC2tjvJ,2.494763,0,0,0,1
5190,4AyL8u1g3ruazEAlu1bllD,2.494763,0,0,0,1
5191,5frrMKfRVby9maqYjlBpDB,2.494763,0,0,0,1


In [24]:
SS_GE_feat = search_feat_df(SS_GE, FEATURES_M)
SS_GE= pd.merge(SS_GE_feat, SS_GE, on="id")
SS_GE.head()

Unnamed: 0,id,valence,energy,liveness,tempo,speechiness,acousticness,instrumentalness,danceability,key,duration_ms,loudness,mode,score,happy,sad,angry,relaxed
0,05wIrZSwuaVWhcv5FfqeH0,0.944,0.869,0.0678,109.902,0.037,0.0116,0.173,0.596,10,238733,-11.97,1,1.36238,1,0,0,0
1,4kbj5MwxO1bq9wjT5g9HaA,0.619,0.866,0.257,128.038,0.0619,0.00701,0.0,0.578,1,199080,-3.804,1,1.367412,1,0,0,0
2,32OlwWuMpZ6b0aN2RZOeMS,0.928,0.609,0.0344,114.988,0.0824,0.00801,8.2e-05,0.856,0,269667,-7.223,1,1.420086,1,0,0,0
3,6JV2JOEocMgcZxYSZelKcc,0.701,0.83,0.191,113.03,0.0749,0.0123,0.0,0.667,0,236002,-5.715,1,1.443567,1,0,0,0
4,6NPVjNh8Jhru9xOmyQigds,0.962,0.757,0.0886,159.911,0.153,0.286,0.0,0.652,1,233305,-6.819,1,1.45954,1,0,0,0


In [26]:
#dataframe normalized
min_max_scaler = preprocessing.MinMaxScaler()
aux_X = min_max_scaler.fit_transform(SS_GE.loc[:, 'valence':'mode'])
SS_GE_norm=pd.DataFrame(aux_X)
SS_GE_norm.columns=SS_GE.columns[1:13]

aux_Y =SS_GE.loc[:, 'happy':'relaxed']
SS_GE_norm=SS_GE_norm.join(aux_Y)
SS_GE_norm.head()

Unnamed: 0,valence,energy,liveness,tempo,speechiness,acousticness,instrumentalness,danceability,key,duration_ms,loudness,mode,happy,sad,angry,relaxed
0,0.960067,0.871242,0.049079,0.386788,0.015249,0.011643,0.175101,0.583515,0.909091,0.365024,0.675977,1.0,1,0,0,0
1,0.618537,0.868224,0.250506,0.499068,0.041989,0.007035,0.0,0.563941,0.090909,0.29528,0.879795,1.0,1,0,0,0
2,0.943253,0.609701,0.013521,0.418276,0.064003,0.008039,8.2e-05,0.866246,0.0,0.419433,0.794459,1.0,1,0,0,0
3,0.704708,0.832011,0.180241,0.406154,0.055949,0.012346,0.0,0.660722,0.0,0.360221,0.832098,1.0,1,0,0,0
4,0.978983,0.758578,0.071223,0.696394,0.13982,0.287146,0.0,0.644411,0.090909,0.355477,0.804543,1.0,1,0,0,0


In [27]:
SS_GE_norm.columns[0:len(SS_GE_norm.columns)-4]

Index([u'valence', u'energy', u'liveness', u'tempo', u'speechiness',
       u'acousticness', u'instrumentalness', u'danceability', u'key',
       u'duration_ms', u'loudness', u'mode'],
      dtype='object')

In [8]:
X_train, X_test, y_train, y_test = train_test_split(SS_GE_norm.loc[:,"valence":"mode"], SS_GE_norm.loc[:,"sad"], test_size=0.3, random_state=0, stratify=SS_GE_norm.loc[:,"sad"])
results_s = findBestModel(X_train.join(y_train), models, feat_select_aprox, 3, 12, "sad", 30)
printResults(results_s, X_train, X_test, y_train, y_test)
saveModels (results_s, "1v3", "SS_GE_sad")

--------------------------------------------------------------------
LinearSVC(C=1.0, class_weight='balanced', dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.001, verbose=0)
Features:['danceability', 'key', 'speechiness', 'mode', 'instrumentalness', 'duration_ms', 'tempo', 'liveness', 'loudness', 'valence', 'acousticness', 'energy']
F1_test: 0.7834
F1_train: 0.7898
Precision: 0.7752
Recall: 0.7991
Accuracy: 0.8036
--------------------------------------------------------------------
KNeighborsClassifier(algorithm='kd_tree', leaf_size=27, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=13, p=2,
           weights='distance')
Features:['instrumentalness', 'duration_ms', 'tempo', 'liveness', 'loudness', 'valence', 'acousticness', 'energy']
F1_test: 0.8163
F1_train: 0.8147
Precision: 0.8185
Recall: 0.8142
Accuracy: 0.8421
--------------------------------------

In [9]:
X_train, X_test, y_train, y_test = train_test_split(SS_GE_norm.loc[:,"valence":"mode"], SS_GE_norm.loc[:,"happy"], test_size=0.3, random_state=0, stratify=SS_GE_norm.loc[:,"happy"])
results_h = findBestModel(X_train.join(y_train), models, feat_select_aprox, 3, 12, "happy", 30)
printResults(results_h, X_train, X_test, y_train, y_test)
saveModels (results_h, "1v3", "SS_GE_happy")

--------------------------------------------------------------------
LinearSVC(C=10.0, class_weight='balanced', dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0)
Features:['key', 'liveness', 'speechiness', 'tempo', 'duration_ms', 'loudness', 'instrumentalness', 'energy', 'danceability', 'acousticness', 'valence']
F1_test: 0.7525
F1_train: 0.7612
Precision: 0.7475
Recall: 0.7715
Accuracy: 0.7670
--------------------------------------------------------------------
KNeighborsClassifier(algorithm='ball_tree', leaf_size=45, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=11, p=2,
           weights='distance')
Features:['speechiness', 'tempo', 'duration_ms', 'loudness', 'instrumentalness', 'energy', 'danceability', 'acousticness', 'valence']
F1_test: 0.8225
F1_train: 0.8076
Precision: 0.8256
Recall: 0.8198
Accuracy: 0.8434
------------------------

In [10]:
X_train, X_test, y_train, y_test = train_test_split(SS_GE_norm.loc[:,"valence":"mode"], SS_GE_norm.loc[:,"angry"], test_size=0.3, random_state=0, stratify=SS_GE_norm.loc[:,"angry"])
results_a = findBestModel(X_train.join(y_train), models, feat_select_aprox, 3, 12, "angry", 30)
printResults(results_a, X_train, X_test, y_train, y_test)
saveModels (results_a, "1v3", "SS_GE_angry")

--------------------------------------------------------------------
LinearSVC(C=10.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=1e-08, verbose=0)
Features:['mode', 'duration_ms', 'instrumentalness', 'tempo', 'danceability', 'liveness', 'speechiness', 'loudness', 'energy', 'acousticness']
F1_test: 0.8219
F1_train: 0.8402
Precision: 0.8435
Recall: 0.8059
Accuracy: 0.8729
--------------------------------------------------------------------
KNeighborsClassifier(algorithm='kd_tree', leaf_size=27, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=13, p=2,
           weights='distance')
Features:['danceability', 'liveness', 'speechiness', 'loudness', 'energy', 'acousticness']
F1_test: 0.8245
F1_train: 0.8567
Precision: 0.8516
Recall: 0.8055
Accuracy: 0.8761
--------------------------------------------------------------------
RandomForestClas

In [11]:
X_train, X_test, y_train, y_test = train_test_split(SS_GE_norm.loc[:,"valence":"mode"], SS_GE_norm.loc[:,"relaxed"], test_size=0.3, random_state=0, stratify=SS_GE_norm.loc[:,"relaxed"])
results_r = findBestModel(X_train.join(y_train), models, feat_select_aprox, 3, 12, "relaxed", 30)
printResults(results_r, X_train, X_test, y_train, y_test)
saveModels (results_r, "1v3", "SS_GE_relaxed")

--------------------------------------------------------------------
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.001, verbose=0)
Features:['tempo', 'liveness', 'mode', 'duration_ms', 'speechiness', 'danceability', 'valence', 'acousticness', 'loudness', 'energy', 'instrumentalness']
F1_test: 0.7332
F1_train: 0.7611
Precision: 0.8624
Recall: 0.6801
Accuracy: 0.9294
--------------------------------------------------------------------
KNeighborsClassifier(algorithm='kd_tree', leaf_size=39, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
           weights='distance')
Features:['liveness', 'mode', 'duration_ms', 'speechiness', 'danceability', 'valence', 'acousticness', 'loudness', 'energy', 'instrumentalness']
F1_test: 0.7841
F1_train: 0.7998
Precision: 0.8505
Recall: 0.7428
Accuracy: 0.9358
----------------------