Project name: DJ-Running

Authors: Jorge García de Quirós, Sandra Baldassarri, Pedro Álvarez

Affiliation/Institution: Computer Science and Systems Engineering Department, University of Zaragoza (Spain)

Paper: RIADA: a machine-learning based infrastructure for recognising the emotions of the Spotify songs

Date: October, 2020

In [6]:
import warnings
import pandas as pd
warnings.simplefilter("ignore")

#One vs rest classifiers
from sklearn import preprocessing
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

import numpy as np
import configparser

%run Tools.ipynb

SA_GE_DIR = '../Datasets/fSP_lAB.csv'
SS_GE_DIR = '../Datasets/fSP_lSP.csv'

In [7]:
config = configparser.ConfigParser()
config.read('../../config.ini'.decode())

CLIENT_ID = config['DEFAULT']['SP_CLIENT_ID']
CLIENT_SECRET = config['DEFAULT']['SP_CLIENT_SECRET']

CSM = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
SP = spotipy.Spotify(client_credentials_manager=CSM, requests_timeout=20)

FEATURES_M= ["id","valence", "energy" ,"liveness", "tempo", "speechiness", "acousticness", "instrumentalness", "danceability","key", "duration_ms","loudness","mode"]

In [10]:
SA_GE = pd.read_csv(SA_GE_DIR)
SA_GE.rename(columns={'_id':'id'}, inplace=True)
SA_GE_feat = search_feat_df(SA_GE, FEATURES_M)
SA_GE= pd.merge(SA_GE_feat, SA_GE, on="id")

min_max_scaler = preprocessing.MinMaxScaler()
aux_X = min_max_scaler.fit(SA_GE.loc[:,'valence':'mode'])
SA_GE.head()

Unnamed: 0,id,valence,energy,liveness,tempo,speechiness,acousticness,instrumentalness,danceability,key,duration_ms,loudness,mode,mood_happy,mood_sad,mood_aggressive,mood_relaxed,moods_mirex
0,3ABnVFVY6MH0lEl6YyDaga,0.511,0.978,0.308,113.009,0.0586,0.00798,0.0,0.563,7,252640,-4.087,0,0.413635,0.063798,0.0,0.808817,Cluster5
1,3YJEIt61V8y43Iti8MIhnU,0.505,0.607,0.71,172.099,0.047,0.0588,0.0719,0.492,0,255907,-11.233,1,0.045076,0.282855,0.063082,0.743057,Cluster3
2,3ghXwIS6cr3P5InauKlqFy,0.268,0.288,0.098,117.482,0.0315,0.746,0.00208,0.436,7,311840,-10.726,1,0.092827,0.677673,0.00536,0.993692,Cluster3
3,4KL4tNheGNqRtOI70ukI7T,0.056,0.719,0.174,114.895,0.0352,0.113,0.967,0.504,6,157347,-5.652,0,0.466487,0.645991,0.0,0.808817,Cluster5
4,2mzh9kabao54H5RMZZ3fOI,0.0717,0.65,0.206,112.671,0.0368,0.223,0.0,0.303,4,304333,-8.652,0,0.779295,0.5,0.289383,0.5,Cluster5


In [11]:
SS_GE = pd.read_csv(SS_GE_DIR)
SS_GE_feat = search_feat_df(SS_GE, FEATURES_M)
SS_GE= pd.merge(SS_GE_feat, SS_GE, on="id")
min_max_scaler3 = preprocessing.MinMaxScaler()
aux_X = min_max_scaler3.fit(SS_GE.loc[:,'valence':'mode'])
SS_GE.head()

Unnamed: 0,id,valence,energy,liveness,tempo,speechiness,acousticness,instrumentalness,danceability,key,duration_ms,loudness,mode,score,happy,sad,angry,relaxed
0,05wIrZSwuaVWhcv5FfqeH0,0.944,0.869,0.0678,109.902,0.037,0.0116,0.173,0.596,10,238733,-11.97,1,1.36238,1,0,0,0
1,4kbj5MwxO1bq9wjT5g9HaA,0.619,0.866,0.257,128.038,0.0619,0.00701,0.0,0.578,1,199080,-3.804,1,1.367412,1,0,0,0
2,32OlwWuMpZ6b0aN2RZOeMS,0.928,0.609,0.0344,114.988,0.0824,0.00801,8.2e-05,0.856,0,269667,-7.223,1,1.420086,1,0,0,0
3,6JV2JOEocMgcZxYSZelKcc,0.701,0.83,0.191,113.03,0.0749,0.0123,0.0,0.667,0,236002,-5.715,1,1.443567,1,0,0,0
4,6NPVjNh8Jhru9xOmyQigds,0.962,0.757,0.0886,159.911,0.153,0.286,0.0,0.652,1,233305,-6.819,1,1.45954,1,0,0,0


# SA_GE againt SS_GE model

In [6]:
#dataframe normalized
aux_X = min_max_scaler3.transform(SA_GE.loc[:, 'valence':'mode'])
SA_GE_norm=pd.DataFrame(aux_X)
SA_GE_norm.columns=SA_GE.columns[0:12]

aux_Y =SA_GE.loc[:, 'mood_happy':'mood_relaxed']
SA_GE_norm=SA_GE_norm.join(aux_Y)
SA_GE_norm.head()

Unnamed: 0,valence,energy,liveness,tempo,speechiness,acousticness,instrumentalness,danceability,key,duration_ms,loudness,mode,mood_happy,mood_sad,mood_aggressive,mood_relaxed
0,0.505044,0.980887,0.305171,0.406024,0.038445,0.008009,0.0,0.548226,0.636364,0.389485,0.872732,0.0,0.413635,0.063798,0.0,0.808817
1,0.528163,0.606683,0.72973,0.771484,0.023948,0.069977,0.062551,0.445896,0.636364,0.395231,0.692624,1.0,0.045076,0.282855,0.063082,0.743057
2,0.249685,0.286799,0.08172,0.433716,0.009343,0.748995,0.002105,0.409972,0.636364,0.49361,0.707026,1.0,0.092827,0.677673,0.00536,0.993692
3,0.026902,0.720353,0.162588,0.4177,0.013316,0.113451,0.978745,0.483997,0.545455,0.221877,0.83367,0.0,0.466487,0.645991,0.0,0.808817
4,0.043401,0.650944,0.196638,0.403931,0.015034,0.223893,0.0,0.265186,0.363636,0.480406,0.758792,0.0,0.779295,0.5,0.289383,0.5


In [7]:
Y = (SA_GE_norm.loc[:, 'mood_sad'] > 0.7) 
Y2 = (SA_GE_norm.loc[:, 'mood_sad'] >= 0.1) 
Y = Y[Y==True].dropna()
print len(Y)
Y2 = Y2[Y2==False].dropna()
print len(Y2)

sad_labels= pd.concat([Y, Y2])
sad_labels = sad_labels.astype('int')

8265
10411


In [14]:
names = ["SS_GE_sad_LinearSVC", "SS_GE_sad_KNeighborsClassifier", "SS_GE_sad_RandomForestClassifier"]
SS_GE_sad_models = loadModels ("./Models/", names)

In [15]:
for name in names:
    model,features = SS_GE_sad_models[name]
    print "--------------" + str(model.__class__.__name__) + "-----------------"  
    printComparation(model,features, SA_GE_norm.loc[:,features].join(sad_labels).dropna().loc[:,features], SA_GE_norm.loc[:,features].join(sad_labels).dropna().loc[:,"mood_sad"])

--------------LinearSVC-----------------
F1_test: 0.8445
Precision: 0.8808
Recall: 0.8371
Accuracy: 0.8531
--------------KNeighborsClassifier-----------------
F1_test: 0.8242
Precision: 0.8621
Recall: 0.8175
Accuracy: 0.8344
--------------RandomForestClassifier-----------------
F1_test: 0.7458
Precision: 0.8328
Recall: 0.7448
Accuracy: 0.7717


In [10]:
Y = (SA_GE_norm.loc[:, 'mood_happy'] > 0.7) 
Y2 = (SA_GE_norm.loc[:, 'mood_happy'] >= 0.1) 
Y = Y[Y==True].dropna()
print len(Y)
Y2 = Y2[Y2==False].dropna()
print len(Y2)

happy_labels= pd.concat([Y, Y2])
happy_labels = happy_labels.astype('int')

8887
15064


In [16]:
names = ["SS_GE_happy_LinearSVC", "SS_GE_happy_KNeighborsClassifier", "SS_GE_happy_RandomForestClassifier"]
SS_GE_happy_models = loadModels ("./Models/", names)

In [17]:
for name in names:
    model,features = SS_GE_happy_models[name]
    print "--------------" + str(model.__class__.__name__) + "-----------------"  
    printComparation(model,features, SA_GE_norm.loc[:,features].join(happy_labels).dropna().loc[:,features], SA_GE_norm.loc[:,features].join(happy_labels).dropna().loc[:,"mood_happy"])

--------------LinearSVC-----------------
F1_test: 0.6498
Precision: 0.6763
Recall: 0.6452
Accuracy: 0.6975
--------------KNeighborsClassifier-----------------
F1_test: 0.6206
Precision: 0.6756
Recall: 0.6210
Accuracy: 0.6900
--------------RandomForestClassifier-----------------
F1_test: 0.6233
Precision: 0.6861
Recall: 0.6240
Accuracy: 0.6950


In [20]:
Y = (SA_GE_norm.loc[:, 'mood_aggressive'] > 0.7) 
Y2 = (SA_GE_norm.loc[:, 'mood_aggressive'] > 0.0) 
Y = Y[Y==True].dropna()
print len(Y)
Y2 = Y2[Y2==False].dropna()
print len(Y2)

aggressive_labels= pd.concat([Y, Y2])
aggressive_labels = aggressive_labels.astype('int')

9663
15900


In [21]:
names = ["SS_GE_angry_LinearSVC", "SS_GE_angry_KNeighborsClassifier", "SS_GE_angry_RandomForestClassifier"]
SS_GE_angry_models = loadModels ("./Models/", names)

In [22]:
for name in names:
    model,features = SS_GE_angry_models[name]
    print "--------------" + str(model.__class__.__name__) + "-----------------"  
    printComparation(model,features, SA_GE_norm.loc[:,features].join(aggressive_labels).dropna().loc[:,features], SA_GE_norm.loc[:,features].join(aggressive_labels).dropna().loc[:,"mood_aggressive"])

--------------LinearSVC-----------------
F1_test: 0.6838
Precision: 0.6976
Recall: 0.7086
Accuracy: 0.6858
--------------KNeighborsClassifier-----------------
F1_test: 0.6994
Precision: 0.7004
Recall: 0.7126
Accuracy: 0.7058
--------------RandomForestClassifier-----------------
F1_test: 0.7003
Precision: 0.7042
Recall: 0.7171
Accuracy: 0.7051


In [23]:
Y = (SA_GE_norm.loc[:, 'mood_relaxed'] > 0.95) 
Y2 = (SA_GE_norm.loc[:, 'mood_relaxed'] > 0.1) 
Y = Y[Y==True].dropna()
print len(Y)
Y2 = Y2[Y2==False].dropna()
print len(Y2)

relaxed_labels= pd.concat([Y, Y2])
relaxed_labels = relaxed_labels.astype('int')

8736
7543


In [24]:
names = ["SS_GE_relaxed_LinearSVC", "SS_GE_relaxed_KNeighborsClassifier", "SS_GE_relaxed_RandomForestClassifier"]
SS_GE_relaxed_models = loadModels ("./Models/", names)

In [25]:
for name in names:
    model,features = SS_GE_relaxed_models[name]
    print "--------------" + str(model.__class__.__name__) + "-----------------"  
    printComparation(model,features, SA_GE_norm.loc[:,features].join(relaxed_labels).dropna().loc[:,features], SA_GE_norm.loc[:,features].join(relaxed_labels).dropna().loc[:,"mood_relaxed"])

--------------LinearSVC-----------------
F1_test: 0.7213
Precision: 0.7929
Recall: 0.7454
Accuracy: 0.7291
--------------KNeighborsClassifier-----------------
F1_test: 0.6679
Precision: 0.7901
Recall: 0.7068
Accuracy: 0.6860
--------------RandomForestClassifier-----------------
F1_test: 0.7196
Precision: 0.8110
Recall: 0.7478
Accuracy: 0.7298
