# Genre Classifier

In [21]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

%matplotlib inline
from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from IPython.display import display, HTML

#spotify
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import csv
import io

In [22]:
df = pd.read_csv("top.csv")
df.head()

#extracting columns we will use to base classifications
#dropping unnecessary columns
df.drop(columns=['Unnamed: 0'], inplace=True)
df

Unnamed: 0,genre,name,artist,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,key,tempo
0,classical,Handel / Orch. Hale: Keyboard Suite in D Minor...,George Frideric Handel,0.0939,0.03360,-24.041,0.0606,0.92700,0.830000,0.0954,0.0516,2,67.359
1,classical,"Goldberg Variations, BWV 988: Aria",Johann Sebastian Bach,0.4540,0.01390,-29.966,0.0514,0.99500,0.943000,0.0736,0.2440,4,130.253
2,classical,"Clair de Lune, L. 32",Claude Debussy,0.3350,0.00532,-31.646,0.0376,0.99400,0.912000,0.0621,0.0397,1,65.832
3,classical,"Sonata No. 14 ""Moonlight"" in C-Sharp Minor"", O...",Ludwig van Beethoven,0.1840,0.00527,-37.264,0.0432,0.99500,0.887000,0.1730,0.1510,1,170.612
4,classical,Miroirs: III. Une barque sur l'océan,Maurice Ravel,0.1700,0.04790,-27.021,0.0438,0.98100,0.906000,0.0795,0.0304,2,75.664
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,rock,Band On The Run - Remastered 2010,Wings,0.4790,0.60100,-8.806,0.0318,0.08320,0.001420,0.1100,0.6690,7,124.966
996,rock,Everlong,Foo Fighters,0.4130,0.88100,-5.541,0.0367,0.00006,0.000308,0.0805,0.3640,11,158.066
997,rock,Helplessly Hoping - 2005 Remaster,"Crosby, Stills & Nash",0.5670,0.15900,-13.648,0.0331,0.91400,0.000000,0.1110,0.4360,7,146.913
998,rock,Killing In The Name,Rage Against The Machine,0.4660,0.83300,-4.215,0.3040,0.02660,0.000000,0.0327,0.6610,7,88.785


In [26]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

In [28]:
k = 10
knn_neighbors = 10

data = df

models = ["Lin SVM", "Poly SVM", "RBF SVM", "KNN", "LR", "NN"]

X = data.iloc[:, 3:]    # data
y = data.iloc[:, 0]     # labels

# use a min/max scaler for performance
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X)
X = pd.DataFrame(scaling.transform(X))

results = [] # accuracy

# create the support vector machines with the linear, poly, and rbf kernels
clfs = [svm.SVC(kernel="linear"), svm.SVC(kernel="poly"), svm.SVC(kernel="rbf"), \
        KNeighborsClassifier(n_neighbors=knn_neighbors), LogisticRegression(random_state=0, solver="sag", multi_class="ovr"), \
        MLPClassifier(random_state=0, max_iter=200)]

kf = KFold(n_splits=k, shuffle=True)

for train_index, test_index in kf.split(X): # split KFold data
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = y[train_index], y[test_index]

    experiment_result = []      # the results using each svm for this partitioned data set

    for i in range(len(clfs)):                  # use the data set to train each svm
        clfs[i].fit(X_train, y_train)           # train the svm
        predictions = clfs[i].predict(X_test)    # predict
        accuracy = sum(predictions == y_test) / len(y_test)   # get accuracy
#         accuracy = clfs[i].score(X_test, y_test)
        
        # get the total results
        d = {"Prediction" : predictions, "Actual" : y_test, "Correct" : predictions == y_test}
        d = pd.DataFrame(data=d)
        
        # get results for each genre prediction
        genre_results = {}
        for n, g in d.groupby("Actual"):
            f = sum(g["Correct"] / len(g))
            if n not in genre_results:
                genre_results[n] = 0
            genre_results[n] += f
        
        # add in total accuracy
        genre_results["Total"] = accuracy
        genre_results = pd.DataFrame(data=genre_results, index=[0])

        # append to the experiment results
        experiment_result.append(genre_results)
        
    results.append(experiment_result)           # append the row of data to the results

# put the results into a dataframe
results = pd.DataFrame(results)
results.columns = models

results_df_list = []

for m in models:
    # print model
    print(m)
    
    # get dataframe of results for the model
    results_df = pd.concat(results.loc[:, m].tolist()).reset_index(drop=True)
    results_df_list.append(results_df)
    
    # calculate averages
    avg_accuracy = [sum(results_df.iloc[:, i]) / len(results_df) for i in range(len(results_df.columns))]
    results_df.loc["Avg"] = avg_accuracy
    
    # print results
    display(results_df)
    
    # print graph
    px.bar(results_df.loc["Avg"]*100, x=results_df.columns, y="Avg", \
            labels={"index": "Genre", "Avg": "Average Accuracy (Percent)"}, title=m, \
            color=results_df.columns).update_yaxes(range=(0, 100), dtick=10).update_layout(showlegend=False).show()
    print()

# print totals
totals = pd.DataFrame(data={models[i]: results_df_list[i].loc[:, "Total"] for i in range(len(models))})
display(totals)

# print graph
px.bar(totals.loc["Avg"]*100, x=totals.columns, y="Avg", \
        labels={"index": "Models", "Avg": "Average Accuracy (Percent)"}, title="Average Model Accuracies", \
        color=models).update_yaxes(range=(0, 100), dtick=10).update_layout(showlegend=False).show()

Lin SVM


Unnamed: 0,classical,country,edm,jazz,lo-fi,metal,pop,r&b,rap,rock,Total
0,0.785714,0.75,0.818182,0.888889,1.0,0.571429,0.375,0.125,0.75,0.4375,0.65
1,0.916667,0.5,0.5,0.571429,0.916667,0.5,0.2,0.625,0.727273,0.444444,0.59
2,1.0,0.5,0.75,0.6,1.0,0.9,0.090909,0.083333,0.375,0.222222,0.54
3,1.0,0.533333,0.4,0.636364,1.0,0.875,0.125,0.444444,0.714286,0.363636,0.6
4,0.888889,0.777778,0.555556,0.5,1.0,0.555556,0.4,0.411765,0.818182,0.333333,0.62
5,0.8,0.538462,0.545455,0.9,0.8,1.0,0.461538,0.428571,0.833333,0.272727,0.64
6,1.0,0.857143,0.444444,0.636364,1.0,0.875,0.3,0.384615,0.555556,0.7,0.67
7,0.875,0.357143,0.285714,0.6,0.909091,0.7,0.4,0.666667,0.416667,0.285714,0.54
8,1.0,0.777778,0.5,0.8,0.933333,0.75,0.166667,0.125,0.666667,0.272727,0.64
9,1.0,0.307692,0.571429,0.4,1.0,1.0,0.444444,0.583333,0.333333,0.285714,0.57



Poly SVM


Unnamed: 0,classical,country,edm,jazz,lo-fi,metal,pop,r&b,rap,rock,Total
0,0.785714,0.75,0.818182,0.777778,1.0,0.714286,0.5,0.375,0.666667,0.1875,0.63
1,0.916667,0.5,0.357143,0.428571,0.916667,0.6,0.333333,0.5,0.818182,0.222222,0.57
2,0.888889,0.8,0.666667,0.9,1.0,0.9,0.090909,0.166667,0.375,0.111111,0.58
3,1.0,0.466667,0.5,0.909091,1.0,0.8125,0.25,0.333333,0.571429,0.363636,0.61
4,0.888889,0.666667,0.555556,0.5,0.9,0.555556,0.4,0.411765,0.818182,0.555556,0.62
5,0.8,0.461538,0.636364,0.9,0.9,1.0,0.461538,0.428571,0.666667,0.181818,0.63
6,0.923077,0.857143,0.444444,0.727273,1.0,0.875,0.5,0.384615,0.666667,0.7,0.7
7,0.75,0.357143,0.285714,0.7,0.909091,0.7,0.6,0.5,0.416667,0.285714,0.56
8,0.8,0.555556,0.4,0.9,0.866667,0.666667,0.166667,0.125,0.555556,0.272727,0.57
9,0.857143,0.615385,0.571429,0.4,1.0,1.0,0.333333,0.666667,0.466667,0.285714,0.62



RBF SVM


Unnamed: 0,classical,country,edm,jazz,lo-fi,metal,pop,r&b,rap,rock,Total
0,0.785714,0.75,0.818182,0.777778,1.0,0.571429,0.375,0.125,0.583333,0.375,0.61
1,0.916667,0.5,0.357143,0.285714,0.833333,0.5,0.266667,0.625,0.818182,0.222222,0.54
2,1.0,0.7,0.666667,0.6,1.0,0.8,0.181818,0.083333,0.375,0.111111,0.54
3,1.0,0.466667,0.4,0.818182,1.0,0.8125,0.25,0.444444,0.714286,0.363636,0.61
4,0.888889,0.666667,0.555556,0.5,0.9,0.666667,0.4,0.411765,0.818182,0.333333,0.61
5,0.8,0.461538,0.636364,0.8,0.8,1.0,0.461538,0.428571,0.666667,0.363636,0.63
6,0.923077,0.857143,0.444444,0.727273,1.0,0.875,0.3,0.230769,0.666667,0.7,0.66
7,0.75,0.357143,0.428571,0.6,0.909091,0.7,0.466667,0.5,0.333333,0.142857,0.52
8,0.8,0.666667,0.3,0.8,0.866667,0.75,0.166667,0.125,0.666667,0.181818,0.57
9,1.0,0.615385,0.571429,0.4,0.909091,0.888889,0.444444,0.583333,0.466667,0.285714,0.61



KNN


Unnamed: 0,classical,country,edm,jazz,lo-fi,metal,pop,r&b,rap,rock,Total
0,0.785714,0.875,0.727273,0.444444,0.857143,0.571429,0.25,0.25,0.666667,0.125,0.54
1,0.916667,0.0,0.5,0.142857,0.833333,0.5,0.333333,0.375,0.454545,0.222222,0.49
2,1.0,0.7,0.666667,0.7,1.0,0.5,0.363636,0.166667,0.375,0.222222,0.56
3,1.0,0.466667,0.4,0.818182,0.8,0.6875,0.375,0.111111,0.714286,0.363636,0.56
4,0.888889,0.777778,0.444444,0.5,0.9,0.444444,0.2,0.294118,0.727273,0.444444,0.56
5,0.8,0.461538,0.727273,0.6,0.8,0.888889,0.230769,0.285714,0.5,0.363636,0.56
6,0.923077,0.714286,0.666667,0.727273,1.0,0.875,0.4,0.076923,0.555556,0.5,0.63
7,0.75,0.428571,0.571429,0.6,0.909091,0.6,0.2,0.333333,0.333333,0.142857,0.48
8,0.8,0.444444,0.4,0.5,0.733333,0.583333,0.333333,0.0,0.555556,0.090909,0.47
9,0.857143,0.461538,0.714286,0.4,0.818182,0.777778,0.222222,0.083333,0.466667,0.428571,0.5



LR


Unnamed: 0,classical,country,edm,jazz,lo-fi,metal,pop,r&b,rap,rock,Total
0,0.785714,0.625,0.818182,0.555556,1.0,0.714286,0.375,0.125,0.583333,0.3125,0.58
1,0.916667,0.5,0.357143,0.285714,1.0,0.7,0.2,0.625,0.818182,0.444444,0.59
2,1.0,0.4,0.666667,0.5,1.0,0.9,0.272727,0.083333,0.375,0.222222,0.53
3,1.0,0.266667,0.5,0.545455,1.0,0.875,0.125,0.444444,0.714286,0.454545,0.57
4,1.0,0.666667,0.555556,0.416667,1.0,0.777778,0.4,0.411765,0.727273,0.333333,0.62
5,0.9,0.461538,0.454545,0.6,0.9,1.0,0.461538,0.285714,0.833333,0.272727,0.6
6,0.923077,0.571429,0.444444,0.636364,1.0,0.875,0.4,0.307692,0.666667,0.6,0.64
7,0.875,0.142857,0.285714,0.6,1.0,0.7,0.466667,0.333333,0.416667,0.142857,0.5
8,0.9,0.555556,0.4,0.8,0.8,0.75,0.166667,0.125,0.666667,0.181818,0.57
9,1.0,0.307692,0.571429,0.3,0.909091,1.0,0.444444,0.416667,0.466667,0.142857,0.54



NN


Unnamed: 0,classical,country,edm,jazz,lo-fi,metal,pop,r&b,rap,rock,Total
0,0.857143,0.75,0.818182,0.888889,0.857143,0.571429,0.375,0.125,0.75,0.5,0.66
1,0.916667,0.5,0.5,0.285714,0.916667,0.5,0.2,0.5,0.727273,0.444444,0.56
2,1.0,0.7,0.833333,0.5,1.0,0.8,0.272727,0.166667,0.375,0.222222,0.58
3,1.0,0.4,0.4,0.545455,1.0,0.8125,0.25,0.444444,0.571429,0.454545,0.57
4,0.888889,0.555556,0.666667,0.75,0.9,0.555556,0.4,0.529412,0.818182,0.333333,0.65
5,0.7,0.538462,0.636364,0.8,0.8,1.0,0.538462,0.428571,0.833333,0.181818,0.63
6,1.0,0.714286,0.555556,0.727273,1.0,0.875,0.3,0.384615,0.666667,0.7,0.69
7,0.75,0.357143,0.142857,0.6,0.818182,0.6,0.533333,0.666667,0.416667,0.142857,0.51
8,0.8,0.444444,0.4,0.8,0.8,0.666667,0.166667,0.125,0.666667,0.272727,0.55
9,1.0,0.538462,0.571429,0.5,0.818182,1.0,0.444444,0.583333,0.6,0.285714,0.63





Unnamed: 0,Lin SVM,Poly SVM,RBF SVM,KNN,LR,NN
0,0.65,0.63,0.61,0.54,0.58,0.66
1,0.59,0.57,0.54,0.49,0.59,0.56
2,0.54,0.58,0.54,0.56,0.53,0.58
3,0.6,0.61,0.61,0.56,0.57,0.57
4,0.62,0.62,0.61,0.56,0.62,0.65
5,0.64,0.63,0.63,0.56,0.6,0.63
6,0.67,0.7,0.66,0.63,0.64,0.69
7,0.54,0.56,0.52,0.48,0.5,0.51
8,0.64,0.57,0.57,0.47,0.57,0.55
9,0.57,0.62,0.61,0.5,0.54,0.63


# Spotify Scraper

In [29]:
CLIENT_ID = "9354a4c040c74126ac9f437a0d266c9c"
CLIENT_SECRET = "ab4449ee4c7e4e5899fbab2cd9e20e9e"

client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp= spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [33]:
artist = 'DaBaby'
track = 'VIBEZ'

# get song information
song = sp.search(q="artist:" + artist + " track:" + track, type="track")
meta_af = sp.audio_features(song['tracks']['items'][0]['id'])
metadata= {'genre': '',
           'name': track, 'artist': artist,
           'danceability': meta_af[0]['danceability'],
           'energy': meta_af[0]['energy'], 'loudness': meta_af[0]['loudness'],
           'speechiness': meta_af[0]['speechiness'],'acousticness': meta_af[0]['acousticness'],
           'instrumentalness': meta_af[0]['instrumentalness'],'liveness': meta_af[0]['liveness'],
           'valence': meta_af[0]['valence'], 'key': meta_af[0]['key'],
           'tempo': meta_af[0]['tempo']}

In [34]:
df = pd.DataFrame(metadata, index=[0])
display(df)
X = df.iloc[:, 3:]    # data
y = df.iloc[:, 0]     # labels

X = pd.DataFrame(scaling.transform(X))
# print(X)


Unnamed: 0,genre,name,artist,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,key,tempo
0,,VIBEZ,DaBaby,0.768,0.652,-2.708,0.307,0.113,0,0.107,0.777,1,154.187


In [35]:
models = ['Lin SVM', 'Poly SVM', 'RBF SVM', 'KNN', 'LR', 'NN']
r = []
for i in range(len(clfs)):
    r.append(clfs[i].predict(X))

for i in range(len(r)):
    print(models[i] + " : ", r[i])

Lin SVM :  ['rap']
Poly SVM :  ['rap']
RBF SVM :  ['rap']
KNN :  ['rap']
LR :  ['rap']
NN :  ['rap']
