# Clustering

In [37]:
import numpy as np
import pandas as pd
import os
import glob
import json
from sklearn.cluster import DBSCAN, KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import plotly.express as px
from clusteval import clusteval
import hdbscan

In [38]:
pd.set_option('display.max_rows', 20)

## Data loading

In [39]:
my_dir = os.getcwd()

In [40]:
# read essentia features
essentia_json_file = open(my_dir + '/RESULTS/essentia_all_features.json', 'r')
es_df = pd.read_json(json.load(essentia_json_file), orient='index')
#es_df['file'] = es_df.index
es_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 404 entries, P_SteelyDan_DoItAga to K_Faure_Romance
Columns: 161 entries, lowlevel.average_loudness to tonal.tuning_nontempered_energy_ratio
dtypes: float64(110), int64(10), object(41)
memory usage: 511.3+ KB


In [41]:
# read opensmile features
# to be able to match datasets, remove .wav suffix from file name and use as index
os_emobase_csv_file = open(my_dir + '/RESULTS/functionals_emobase.csv', 'r')
os_emobase_df = pd.read_csv(os_emobase_csv_file)
os_emobase_df['file'] = os_emobase_df['file'].str.slice(stop=-4)
os_emobase_df.set_index('file', inplace=True)
os_emobase_df.info()

os_compare_csv_file = open(my_dir + '/RESULTS/functionals_compare.csv', 'r')
os_compare_df = pd.read_csv(os_compare_csv_file)
os_compare_df['file'] = os_compare_df['file'].str.slice(stop=-4)
os_compare_df.set_index('file', inplace=True)
os_compare_df.info()

os_egemaps_csv_file = open(my_dir + '/RESULTS/functionals_eGeMAPS.csv', 'r')
os_egemaps_df = pd.read_csv(os_egemaps_csv_file)
os_egemaps_df['file'] = os_egemaps_df['file'].str.slice(stop=-4)
os_egemaps_df.set_index('file', inplace=True)
os_egemaps_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 404 entries, H_2Pac_AllEyez to P_ZaraLarsson_LushLif
Columns: 988 entries, pcm_intensity_sma_max to F0env_sma_de_iqr1-3
dtypes: float64(988)
memory usage: 3.0+ MB
<class 'pandas.core.frame.DataFrame'>
Index: 404 entries, H_2Pac_AllEyez to P_ZaraLarsson_LushLif
Columns: 6373 entries, audspec_lengthL1norm_sma_range to mfcc_sma_de[14]_stddevFallingSlope
dtypes: float64(6373)
memory usage: 19.6+ MB
<class 'pandas.core.frame.DataFrame'>
Index: 404 entries, H_2Pac_AllEyez to P_ZaraLarsson_LushLif
Data columns (total 88 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   F0semitoneFrom27.5Hz_sma3nz_amean               404 non-null    float64
 1   F0semitoneFrom27.5Hz_sma3nz_stddevNorm          404 non-null    float64
 2   F0semitoneFrom27.5Hz_sma3nz_percentile20.0      404 non-null    float64
 3   F0semitoneFrom27.5Hz_sma3nz_percenti

In [42]:
# create features df
feature_df = pd.concat([es_df, os_emobase_df, os_compare_df, os_egemaps_df], axis=1, join='inner')
feature_df

Unnamed: 0,lowlevel.average_loudness,lowlevel.barkbands.mean,lowlevel.barkbands.stdev,lowlevel.barkbands_crest.mean,lowlevel.barkbands_crest.stdev,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_flatness_db.stdev,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_kurtosis.stdev,lowlevel.barkbands_skewness.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
P_SteelyDan_DoItAga,0.919200,"[7.215923687908798e-05, 0.003221553750336, 0.0...","[0.000125425038277, 0.0033255410380660004, 0.0...",9.547308,3.488428,0.139241,0.046070,3.982578,5.020024,1.412863,...,0.003062,-0.009705,0.889199,3.029647,2.079723,0.405521,0.411678,0.066974,0.131149,-24.320168
P_CarlosSantana_Smooth,0.968585,"[0.000286486232653, 0.003258388955146, 0.00221...","[0.0006648916751140001, 0.00559690920636, 0.00...",9.392195,3.959935,0.128081,0.050442,3.050984,5.621723,1.281382,...,-0.020268,-0.014384,1.398265,2.878598,3.071138,0.231735,0.303927,0.096667,0.099652,-23.505167
P_Bilderbuch_Maschin,0.975059,"[0.002396335126832, 0.014416303485631, 0.00317...","[0.007265429478138001, 0.022202195599675, 0.00...",12.622601,4.904925,0.187935,0.094121,14.597932,37.198483,2.291035,...,-0.058589,-0.007708,1.248563,3.408434,3.064765,0.223082,0.220799,0.098030,0.117959,-23.464863
H_ScroobiusPip_LetEmCo,0.929393,"[7.407886005239561e-05, 0.0027056001126760003,...","[0.00022043807257400002, 0.0068399272859090005...",8.377178,3.198502,0.148757,0.024648,1.037190,2.933415,1.065743,...,-0.017019,-0.005999,5.350882,4.937630,4.138470,0.166604,0.205245,0.068852,0.060234,-12.224318
K_Lalo_Symphon,0.487808,"[0.00010404754721000001, 0.000633063260465, 0....","[0.000342587591148, 0.004935638513416001, 0.00...",10.557078,3.947501,0.186368,0.041468,1.195475,3.354729,0.442220,...,0.046756,-0.009554,0.174899,3.047714,0.675338,1.372222,1.108049,0.160000,0.347645,-29.190495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
H_Mattafix_BigCity,0.888376,"[0.002617650199681, 0.027940051630139, 0.00433...","[0.004378757905215001, 0.041353624314069005, 0...",12.973980,5.990039,0.149148,0.061467,13.672770,28.042810,2.560586,...,-0.063773,-0.008385,1.148074,3.371090,3.728722,0.157391,0.265271,0.105128,0.125723,-22.782228
H_Deli_5_32,0.777475,"[0.0030845752917230003, 0.030100520700216002, ...","[0.0072015416808420005, 0.069281347095966, 0.0...",15.853027,5.588224,0.343833,0.126390,25.335560,65.085907,2.147535,...,-0.065532,-0.021001,1.692957,1.383103,1.884904,0.451593,0.585942,0.077683,0.078543,-20.583393
K_Casella_Italia,0.757737,"[1.6204321582335982e-05, 0.004550766199827, 0....","[4.532530147116632e-05, 0.0073969112709160005,...",10.892110,3.937567,0.252456,0.072970,4.767667,5.132807,1.546931,...,0.035273,-0.008329,0.550970,1.586612,0.870322,1.064000,2.107083,0.093846,0.217115,-26.679905
H_KRS-One_SoundOf,0.966410,"[0.000578074192162, 0.013757576234638, 0.00601...","[0.001001785509288, 0.022666951641440003, 0.01...",10.825400,5.489381,0.154187,0.076684,15.478809,36.061810,1.999858,...,-0.037110,-0.005530,0.912849,4.817774,5.173707,0.099517,0.079417,0.082597,0.096028,-24.445463


In [43]:
with open('custom_features.csv', 'r') as f:
    custom_features = f.readlines()
    custom_features = list(map(str.strip, custom_features))

feature_df = feature_df[custom_features]
feature_df

Unnamed: 0,loudness_sma3_amean,loudness_sma3_stddevNorm,loudness_sma3_percentile20.0,loudness_sma3_percentile50.0,loudness_sma3_percentile80.0,loudness_sma3_pctlrange0-2,loudness_sma3_meanRisingSlope,loudness_sma3_stddevRisingSlope,loudness_sma3_meanFallingSlope,loudness_sma3_stddevFallingSlope,...,pcm_fftMag_spectralSkewness_sma_upleveltime75,pcm_fftMag_spectralSkewness_sma_upleveltime90,pcm_fftMag_spectralSkewness_sma_risetime,pcm_fftMag_spectralSkewness_sma_leftctime,pcm_fftMag_spectralSkewness_sma_lpgain,pcm_fftMag_spectralSkewness_sma_lpc0,pcm_fftMag_spectralSkewness_sma_lpc1,pcm_fftMag_spectralSkewness_sma_lpc2,pcm_fftMag_spectralSkewness_sma_lpc3,pcm_fftMag_spectralSkewness_sma_lpc4
P_SteelyDan_DoItAga,1.538737,0.412269,1.074508,1.567701,2.075794,1.001285,14.844894,11.653144,7.304061,3.943636,...,0.001515,0.000433,0.524903,0.514403,0.433507,-1.714201,0.615196,0.679010,-0.851437,0.282827
P_CarlosSantana_Smooth,1.871535,0.307418,1.565613,1.951817,2.292089,0.726476,13.915514,8.948760,5.100642,3.071945,...,0.006575,0.001252,0.491387,0.509712,0.339347,-1.558618,0.471575,0.536048,-0.666430,0.228317
P_Bilderbuch_Maschin,1.440805,0.484144,0.740741,1.523065,2.109256,1.368516,18.565580,14.489251,7.212107,4.641813,...,0.003275,0.000193,0.510790,0.522451,4.798762,-1.767786,0.862768,0.319551,-0.637279,0.246736
H_ScroobiusPip_LetEmCo,4.773602,0.263892,3.530034,4.767560,5.914018,2.383984,32.204150,17.877026,21.857838,10.628002,...,0.010660,0.002340,0.496489,0.525494,0.195568,-1.592993,0.501534,0.614809,-0.723284,0.212770
K_Lalo_Symphon,1.080630,0.480699,0.694951,1.033156,1.481316,0.786365,11.268788,5.823882,6.341367,3.850866,...,0.002249,0.000750,0.511500,0.514879,0.142480,-1.682672,0.686499,0.492324,-0.730945,0.248477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
H_Mattafix_BigCity,1.472121,0.427871,0.970976,1.509870,2.006747,1.035772,14.515555,9.254900,7.586551,4.240226,...,0.003508,0.001079,0.521997,0.516469,1.237256,-1.515968,0.277882,0.792927,-0.751490,0.213251
H_Deli_5_32,0.748189,0.633599,0.509825,0.651870,0.811578,0.301753,15.637335,16.011223,9.421942,10.245292,...,0.016669,0.003501,0.541347,0.515258,44.870720,-1.735665,0.689569,0.615651,-0.883552,0.343426
K_Casella_Italia,1.042671,0.501802,0.670385,0.919994,1.428157,0.757772,3.983372,2.616134,2.318078,1.118226,...,0.003262,0.001522,0.501305,0.510768,1.806407,-1.187223,-0.154036,0.771603,-0.322216,-0.095709
H_KRS-One_SoundOf,1.362671,0.436007,0.899966,1.366131,1.809999,0.910032,17.391273,11.113556,10.704032,4.840636,...,0.006743,0.000500,0.487634,0.547976,1.517120,-1.456996,0.161408,0.810954,-0.605660,0.110270


In [44]:
# read gems data
gems = pd.read_csv('rating_data/GEMS_songs_overview.csv', skiprows=1)
gems.columns = gems.columns.str.lower()
# there's spaces in song titles - replace with _
gems.replace(' ','_', regex=True, inplace=True)
# there's a typo in the underlying csv
gems.rename(columns={"transcendece": "transcendence"}, inplace=True)

gems_feature_cols = ['wonder', 'transcendence', 'tenderness', 'nostalgia', 'peacefulness', 'sadness', 'tension', 'energy', 'joyful activation']
gems_df = gems[['mp3-code', 'genre', 'code', 'title', 'artist'] + gems_feature_cols]
gems_df = gems_df.set_index('mp3-code')

gems_df.dropna(inplace=True)
gems_df

Unnamed: 0_level_0,genre,code,title,artist,wonder,transcendence,tenderness,nostalgia,peacefulness,sadness,tension,energy,joyful activation
mp3-code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
H_Trailerpark_Schlech,Hip_Hop,H001,Schlechter_Tag,Trailerpark,15.65,19.68,5.30,3.54,5.08,0.00,11.20,2.35,22.33
H_Desiigner_Panda,Hip_Hop,H002,Panda,Desiigner,24.69,4.69,9.30,0.78,7.92,1.06,9.33,22.73,28.26
H_NAS_DooRags,Hip_Hop,H003,Doo_Rags,NAS,24.43,18.57,19.88,14.29,17.55,2.50,0.00,27.48,24.67
H_DMX_XGonGiv,Hip_Hop,H004,X_Gon'_Give_It_To_Ya,DMX,17.27,7.05,8.60,9.89,9.52,0.73,0.00,37.77,37.27
H_Xatar_MeineGr,Hip_Hop,H005,Meine_Große_Liebe,Xatar,6.46,15.86,9.27,0.00,0.67,0.00,3.79,15.98,13.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...
P_Bilderbuch_Maschin,Pop,P140,Maschin,Bilderbuch,7.28,2.44,2.53,5.07,9.92,0.00,10.48,10.14,20.14
P_FletwoodMac_Dreams,Pop,P141,Dreams,Fletwood_Mac,12.04,9.35,17.41,22.00,22.82,1.36,1.71,3.24,14.06
P_Oasis_DontLoo,Pop,P142,Don't_Look_Back_In_Anger,Oasis,10.31,7.60,14.10,31.85,15.00,10.03,6.56,3.99,18.47
P_DerJungemitderGitarre_HalloWo,Pop,P143,Hallo_Worum_Gehts_Ich_Bin_Dagegen,Der_Junge_mit_der_Gitarre,11.20,8.11,0.00,0.00,4.20,1.33,8.74,15.02,27.52


In [45]:
int_df = pd.merge(gems_df[gems_feature_cols], feature_df, how ='inner',left_index=True, right_index=True)
int_df

Unnamed: 0,wonder,transcendence,tenderness,nostalgia,peacefulness,sadness,tension,energy,joyful activation,loudness_sma3_amean,...,pcm_fftMag_spectralSkewness_sma_upleveltime75,pcm_fftMag_spectralSkewness_sma_upleveltime90,pcm_fftMag_spectralSkewness_sma_risetime,pcm_fftMag_spectralSkewness_sma_leftctime,pcm_fftMag_spectralSkewness_sma_lpgain,pcm_fftMag_spectralSkewness_sma_lpc0,pcm_fftMag_spectralSkewness_sma_lpc1,pcm_fftMag_spectralSkewness_sma_lpc2,pcm_fftMag_spectralSkewness_sma_lpc3,pcm_fftMag_spectralSkewness_sma_lpc4
H_2Pac_AllEyez,8.44,6.01,9.27,7.34,17.59,0.81,12.55,12.91,15.62,1.025275,...,0.001761,0.000293,0.502201,0.513799,2.470193,-1.729068,0.739641,0.600028,-0.871057,0.290653
H_2Pac_KeepYaH,4.90,6.02,4.90,5.52,19.81,3.13,3.60,5.89,13.66,1.258047,...,0.001706,0.000731,0.500244,0.529139,3.156822,-1.610032,0.517668,0.710965,-1.007869,0.419844
H_50Cent_CandySh,7.23,1.94,15.27,12.66,4.45,0.00,3.15,20.87,33.57,1.130380,...,0.001041,0.000625,0.498750,0.528640,6.657747,-1.702263,0.755726,0.465474,-0.822655,0.344334
H_50Cent_InDaClu,9.26,4.18,3.31,9.81,10.30,0.00,7.59,16.79,35.35,1.085617,...,0.003681,0.001315,0.516307,0.543541,7.277155,-1.640338,0.595514,0.549269,-0.727419,0.268608
H_ASAPRocky_Fashion,4.03,5.66,5.80,4.98,7.83,2.30,10.22,4.95,12.41,1.368002,...,0.008106,0.003513,0.571892,0.529603,1.143816,-2.071668,1.361044,0.136727,-0.713763,0.309816
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P_VanessaCarlton_AThousa,26.49,4.31,13.87,27.54,13.58,2.93,4.32,5.92,32.85,1.449403,...,0.003912,0.001087,0.525435,0.520548,0.622509,-1.801850,0.773830,0.590474,-0.863828,0.316903
P_Weeknd_Starboy,21.36,11.14,12.89,10.58,14.75,4.36,10.23,10.27,24.65,1.356187,...,0.001766,0.001178,0.502650,0.521649,3.610578,-1.733542,0.914547,0.303814,-0.845247,0.385616
P_WhitneyHouston_IWannaD,23.95,7.15,10.11,25.57,14.98,2.38,0.94,28.61,51.04,1.759732,...,0.002028,0.000760,0.519777,0.513568,0.147094,-1.812362,0.853797,0.424794,-0.719691,0.268954
P_Wrabel_TheVill,19.17,13.78,18.16,14.19,20.88,16.86,3.63,5.80,11.78,1.651493,...,0.006178,0.000441,0.504745,0.509271,0.170646,-1.619206,0.556818,0.595743,-0.805661,0.279110


## Tagging

In [89]:
from numpy import mean
from numpy import std

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, RepeatedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

import lightgbm as lgb

In [72]:
scaler = MinMaxScaler()
cv = RepeatedKFold(n_splits=5, n_repeats=2, random_state=42)

X, y = int_df[custom_features].to_numpy(), int_df[gems_feature_cols].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 
X_train.shape, y_train.shape

((288, 116), (288, 9))

In [75]:
# model = DecisionTreeRegressor()
# pipe = Pipeline(steps=[("scaler", scaler), ("tree", model)])

# params = {
#     "tree__criterion": ['squared_error', 'absolute_error'],
#     "tree__max_depth": [2, 4, 6, 8, 10, 12]
#     }
    
# search = GridSearchCV(pipe, params, cv=5, scoring="neg_mean_absolute_error", verbose=1)
# search.fit(X, y)
# print("Best parameter (CV score=%0.3f):" % search.best_score_)
# print(search.best_params_)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameter (CV score=-5.611):
{'tree__criterion': 'absolute_error', 'tree__max_depth': 2}


In [51]:
model = DecisionTreeRegressor(model__criterion='absolute_error', model__max_depth= 2, n_jobs=-1)
pipe = Pipeline(steps=[("scaler", scaler), ("tree", model)])

results = []
for train_ix, test_ix in cv.split(X):
		X_train, X_test = X[train_ix], X[test_ix]
		y_train, y_test = y[train_ix], y[test_ix]

		pipe.fit(X_train, y_train)
		mae = mean_absolute_error(pipe.predict(X_test), y_test)

		print('>%.3f' % mae)
		results.append(mae)

print('MAE: %.3f (%.3f)' % (mean(results), std(results)))

(5.34077991287895, 50.55318004144236)

In [85]:
# model = MultiOutputRegressor(lgb.LGBMRegressor(), n_jobs=-1)
# pipe = Pipeline(steps=[("scaler", scaler), ("lgbm", model)])

# params = {
#     'lgbm__estimator__boosting_type': ['gbdt', 'dart'],
#     'lgbm__estimator__learning_rate': [0.005, 0.01, 0.1],
#     'lgbm__estimator__n_estimators': [10, 100, 500, 1000],
#     'lgbm__estimator__objective': ['regression', 'regression_l1', 'huber', 'mape', 'tweedie'],
#     }
    
# search = GridSearchCV(pipe, params, cv=5, scoring="neg_mean_absolute_error", verbose=1)

# print(model.estimator.get_params().keys())

# search.fit(X, y)
# print("Best parameter (CV score=%0.3f):" % search.best_score_)
# print(search.best_params_)

dict_keys(['boosting_type', 'class_weight', 'colsample_bytree', 'importance_type', 'learning_rate', 'max_depth', 'min_child_samples', 'min_child_weight', 'min_split_gain', 'n_estimators', 'n_jobs', 'num_leaves', 'objective', 'random_state', 'reg_alpha', 'reg_lambda', 'silent', 'subsample', 'subsample_for_bin', 'subsample_freq'])
Fitting 5 folds for each of 120 candidates, totalling 600 fits
Best parameter (CV score=-5.193):
{'lgbm__estimator__boosting_type': 'gbdt', 'lgbm__estimator__learning_rate': 0.01, 'lgbm__estimator__n_estimators': 1000, 'lgbm__estimator__objective': 'regression_l1'}


In [86]:
model = MultiOutputRegressor(lgb.LGBMRegressor(boosting_type='gbdt', learning_rate=0.01, n_estimators=1000, objective='regression_l1'), n_jobs=-1)
pipe = Pipeline(steps=[("scaler", scaler), ("lgbm", model)])

results = []
for train_ix, test_ix in cv.split(X):
		X_train, X_test = X[train_ix], X[test_ix]
		y_train, y_test = y[train_ix], y[test_ix]

		pipe.fit(X_train, y_train)
		mae = mean_absolute_error(pipe.predict(X_test), y_test)

		print('>%.3f' % mae)
		results.append(mae)

print('MAE: %.3f (%.3f)' % (mean(results), std(results)))

>5.109
>4.937
>4.867
>4.939
>4.983
>4.767
>4.677
>5.021
>5.088
>5.031
MAE: 4.942 (0.131)


In [92]:
def get_model(n_inputs, n_outputs):
	model = Sequential()
	model.add(Dense(50, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
	#model.add(BatchNormalization())
	#model.add(Dropout(0.4))
	#model.add(Dense(20, activation='relu'))
	model.add(Dense(n_outputs))
	model.compile(loss='mae', optimizer='adam')
	return model
 
def evaluate_model(X, y):
	results = list()
	n_inputs, n_outputs = X.shape[1], y.shape[1]
	for train_ix, test_ix in cv.split(X):
		X_train, X_test = X[train_ix], X[test_ix]
		y_train, y_test = y[train_ix], y[test_ix]

		model = get_model(n_inputs, n_outputs)

		model.fit(X_train, y_train, verbose=0, epochs=100)
		mae = model.evaluate(X_test, y_test, verbose=0)

		print('>%.3f' % mae)
		results.append(mae)
	return results


results = evaluate_model(scaler.fit_transform(X), y)
print('MAE: %.3f (%.3f)' % (mean(results), std(results)))

>5.487
>5.312
>5.057
>5.152
>4.941
>5.041
>5.029
>5.305
>5.279
>5.145
MAE: 5.175 (0.159)
