In [13]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

%matplotlib inline
from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

#spotify
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import csv
import io

In [14]:
df = pd.read_csv("top.csv")
df.head()
#df.info()

Unnamed: 0.1,Unnamed: 0,genre,name,artist,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,key,tempo
0,0,classical,Handel / Orch. Hale: Keyboard Suite in D Minor...,George Frideric Handel,93,33,-24,60,927,830,95,51,2,67
1,1,classical,"Goldberg Variations, BWV 988: Aria",Johann Sebastian Bach,454,13,-29,51,995,943,73,244,4,130
2,2,classical,"Clair de Lune, L. 32",Claude Debussy,335,5,-31,37,994,912,62,39,1,65
3,3,classical,"Sonata No. 14 ""Moonlight"" in C-Sharp Minor"", O...",Ludwig van Beethoven,184,5,-37,43,995,887,173,151,1,170
4,4,classical,Miroirs: III. Une barque sur l'océan,Maurice Ravel,170,47,-27,43,981,906,79,30,2,75


In [15]:
#extracting columns we will use to base classifications
#dropping unnecessary columns

# UNCOMMENT FOR MEGADATASET
# df.drop(columns=['year', 'release_date', 'id', 'duration_ms', 'artists', 
#                  'name', 'explicit', 'popularity', 'mode', 'key', 'loudness'], inplace=True)

# df.drop(columns=['Unnamed: 0','year','dur','pop'], inplace= True)
# df

df.drop(columns=['Unnamed: 0'], inplace=True)
df

Unnamed: 0,genre,name,artist,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,key,tempo
0,classical,Handel / Orch. Hale: Keyboard Suite in D Minor...,George Frideric Handel,93,33,-24,60,927,830,95,51,2,67
1,classical,"Goldberg Variations, BWV 988: Aria",Johann Sebastian Bach,454,13,-29,51,995,943,73,244,4,130
2,classical,"Clair de Lune, L. 32",Claude Debussy,335,5,-31,37,994,912,62,39,1,65
3,classical,"Sonata No. 14 ""Moonlight"" in C-Sharp Minor"", O...",Ludwig van Beethoven,184,5,-37,43,995,887,173,151,1,170
4,classical,Miroirs: III. Une barque sur l'océan,Maurice Ravel,170,47,-27,43,981,906,79,30,2,75
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,rock,Shine,Collective Soul,540,423,-8,28,294,88,152,353,6,150
996,rock,Helplessly Hoping - 2005 Remaster,"Crosby, Stills & Nash",567,159,-13,33,914,0,111,436,7,146
997,rock,Everlong,Foo Fighters,413,881,-5,36,0,0,80,364,11,158
998,rock,The Power Of Love,Huey Lewis & The News,758,831,-5,32,85,0,101,958,5,118


In [16]:
# fig = px.scatter(df, x='bpm', y='nrgy',color='top genre', hover_name='artist',hover_data=['title'])
# fig = px.scatter(df, x='tempo', y='energy',color='genre', hover_name='name', hover_data=['artist'])
# fig.show()

In [18]:
k = 5
knn_neighbors = 10

data = df

models = ["Lin SVM", "Poly SVM", "RBF SVM", "KNN", "LR", "NN"]

X = data.iloc[:, 3:]    # data
y = data.iloc[:, 0]     # labels

# use a min/max scaler for performance
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X)
X = pd.DataFrame(scaling.transform(X))

results = [] # accuracy

# create the support vector machines with the linear, poly, and rbf kernels
clfs = [svm.SVC(kernel="linear"), svm.SVC(kernel="poly"), svm.SVC(kernel="rbf"), \
        KNeighborsClassifier(n_neighbors=knn_neighbors), LogisticRegression(random_state=0, solver="sag", multi_class="ovr"), \
        MLPClassifier(random_state=0, max_iter=200)]

kf = KFold(n_splits=k, shuffle=True)

for train_index, test_index in kf.split(X): # split KFold data
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = y[train_index], y[test_index]

    experiment_result = []      # the results using each svm for this partitioned data set

    for i in range(len(clfs)):                  # use the data set to train each svm
        clfs[i].fit(X_train, y_train)           # train the svm
        predictions = clfs[i].predict(X_test)    # predict
        accuracy = sum(predictions == y_test) / len(y_test)   # get accuracy
#         accuracy = clfs[i].score(X_test, y_test)
        
        # get the total results
        d = {"Prediction" : predictions, "Actual" : y_test, "Correct" : predictions == y_test}
        d = pd.DataFrame(data=d)
        
        # get results for each genre prediction
        genre_results = {}
        for n, g in d.groupby("Actual"):
            f = sum(g["Correct"] / len(g))
            if n not in genre_results:
                genre_results[n] = 0
            genre_results[n] += f
        
        # add in total accuracy
        genre_results["Total"] = accuracy
        genre_results = pd.DataFrame(data=genre_results, index=[0])

        # append to the experiment results
        experiment_result.append(genre_results)
        
    results.append(experiment_result)           # append the row of data to the results

# put the results into a dataframe
results = pd.DataFrame(results)
results.columns = models

# calculate the average accuracy for each column
# avg_accuracy = [sum(results.iloc[:, i]) / len(results) for i in range(len(results.columns))]
# results.loc["Avg"] = avg_accuracy

# display results
# print(results)

for m in models:
    # print model
    print(m)
    
    # get dataframe of results for the model
    results_df = pd.concat(results.loc[:, m].tolist()).reset_index(drop=True)
    
    # calculate averages
    avg_accuracy = [sum(results_df.iloc[:, i]) / len(results_df) for i in range(len(results_df.columns))]
    results_df.loc["Avg"] = avg_accuracy
    
    # print results
    print(results_df)
    print()



Lin SVM
     classical   country       edm      jazz     lo-fi     metal       pop  \
0     0.888889  0.684211  0.650000  0.500000  0.875000  0.869565  0.214286   
1     0.956522  0.454545  0.631579  0.466667  0.947368  0.736842  0.269231   
2     0.920000  0.473684  0.500000  0.625000  0.863636  0.937500  0.541667   
3     0.761905  0.681818  0.380952  0.600000  0.941176  0.681818  0.400000   
4     0.923077  0.277778  0.650000  0.518519  0.888889  0.700000  0.250000   
Avg   0.890078  0.514407  0.562506  0.542037  0.903214  0.785145  0.335037   

          r&b       rap      rock  Total  
0    0.416667  0.529412  0.368421  0.615  
1    0.380952  0.636364  0.214286  0.575  
2    0.357143  0.526316  0.280000  0.605  
3    0.476190  0.750000  0.375000  0.605  
4    0.500000  0.636364  0.423077  0.565  
Avg  0.426190  0.615691  0.332157  0.593  

Poly SVM
     classical   country       edm      jazz     lo-fi     metal       pop  \
0     0.833333  0.631579  0.600000  0.727273  0.875000  



In [19]:
CLIENT_ID = "9354a4c040c74126ac9f437a0d266c9c"
CLIENT_SECRET = "ab4449ee4c7e4e5899fbab2cd9e20e9e"

client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp= spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [20]:
artist = 'The Weekend'
track = 'After Hours'
song = sp.search(q="artist:" + artist + " track:" + track, type="track")
meta_af = sp.audio_features(song['tracks']['items'][0]['id'])
metadata= {'danceability': int(meta_af[0]['danceability']*1000),
           'energy': int(meta_af[0]['energy']*1000), 'loudness': int(meta_af[0]['loudness']),
           'speechiness': int(1000*meta_af[0]['speechiness']),'acousticness': int(1000*meta_af[0]['acousticness']),
           'instrumentalness': int(1000*meta_af[0]['instrumentalness']),'liveness': int(1000*meta_af[0]['liveness']),
           'valence': int(1000*meta_af[0]['valence']), 'key': meta_af[0]['key'],
           'tempo': int(meta_af[0]['tempo'])}

In [21]:
df = pd.DataFrame(metadata, index=[0])
display(df)

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,key,tempo
0,664,572,-6,30,81,6,121,143,5,108
