# SVM

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [2]:
data = pd.read_csv(os.path.join('Data', 'w_secondaryGenre.csv'))
data.head()

Unnamed: 0,id,artists,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key,mode,count,genres,primary_genres,secondary_genres,PrimarynSecondary_genres
0,26854,Young Boss,0.145,0.986,229669.0,0.597,2e-06,0.197,-6.809,0.308,115.004,0.761,44.0,8,1,1,['vapor trap'],rap,trap,rap/trap
1,113,4B,0.00441,0.63,224052.0,0.851,0.0218,0.0939,-4.61,0.319,150.054,0.573,54.0,1,1,1,"['brostep', 'electro house', 'electronic trap']",rap,trap,rap/trap
2,12857,Keith Ape,0.030928,0.734,236647.0,0.701,4e-06,0.202,-5.3055,0.1661,129.04,0.3395,61.0,9,1,2,"['korean trap', 'underground hip hop']",rap,trap,rap/trap
3,18347,Paloma Mami,0.411,0.876,159132.0,0.568,0.00274,0.187,-5.755,0.0533,98.027,0.593,74.0,0,1,2,"['latin pop', 'reggaeton chileno', 'trap chile...",rap,trap,rap/trap
4,20074,Rich The Kid,0.104304,0.810042,194635.9167,0.640417,0.000338,0.192833,-6.649042,0.225142,132.294208,0.504083,68.208333,11,1,24,"['atl hip hop', 'hip hop', 'melodic rap', 'pop...",rap,trap,rap/trap


In [3]:
cleaned_data=data.drop(["id","artists","count","genres","popularity","primary_genres","PrimarynSecondary_genres"],axis=1)
cleaned_data.head()

Unnamed: 0,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,key,mode,secondary_genres
0,0.145,0.986,229669.0,0.597,2e-06,0.197,-6.809,0.308,115.004,0.761,8,1,trap
1,0.00441,0.63,224052.0,0.851,0.0218,0.0939,-4.61,0.319,150.054,0.573,1,1,trap
2,0.030928,0.734,236647.0,0.701,4e-06,0.202,-5.3055,0.1661,129.04,0.3395,9,1,trap
3,0.411,0.876,159132.0,0.568,0.00274,0.187,-5.755,0.0533,98.027,0.593,0,1,trap
4,0.104304,0.810042,194635.9167,0.640417,0.000338,0.192833,-6.649042,0.225142,132.294208,0.504083,11,1,trap


In [4]:
X = cleaned_data.drop("secondary_genres", axis=1)
y = cleaned_data["secondary_genres"]
print(X.shape, y.shape)

(700, 12) (700,)


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [6]:
from sklearn.svm import SVC 
smodel = SVC(kernel='linear',C= 5, gamma= 0.0001)

In [7]:
from sklearn.preprocessing import MinMaxScaler
X_Scaler=MinMaxScaler().fit(X_train)
X_train_scaled4 = X_Scaler.transform(X_train)
X_test_scaled4 = X_Scaler.transform(X_test)

In [8]:
smodel.fit(X_train_scaled4, y_train)

SVC(C=5, gamma=0.0001, kernel='linear')

In [9]:
print(f"Training Data Score: {smodel.score(X_train_scaled4, y_train)}")
print(f"Testing Data Score: {smodel.score(X_test_scaled4, y_test)}")

Training Data Score: 0.8514285714285714
Testing Data Score: 0.8285714285714286


# Hyperparameter Tuning

In [10]:
# Create the GridSearchCV model
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10,50],
              'gamma': [0.0001, 0.0005,0.001, 0.005,0.01]}
grid = GridSearchCV(smodel, param_grid, verbose=3)

In [11]:
grid.fit(X_train_scaled4, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.752, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.819, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.695, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.800, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.810, total=   0.0s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.752, total=   0.0s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ..........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .................... C=5, gamma=0.001, score=0.810, total=   0.0s
[CV] C=5, gamma=0.001 ................................................
[CV] .................... C=5, gamma=0.001, score=0.743, total=   0.0s
[CV] C=5, gamma=0.001 ................................................
[CV] .................... C=5, gamma=0.001, score=0.800, total=   0.0s
[CV] C=5, gamma=0.001 ................................................
[CV] .................... C=5, gamma=0.001, score=0.810, total=   0.0s
[CV] C=5, gamma=0.005 ................................................
[CV] .................... C=5, gamma=0.005, score=0.771, total=   0.0s
[CV] C=5, gamma=0.005 ................................................
[CV] .................... C=5, gamma=0.005, score=0.810, total=   0.0s
[CV] C=5, gamma=0.005 ................................................
[CV] .................... C=5, gamma=0.005, score=0.743, total=   0.0s
[CV] C=5, gamma=0.005 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.5s finished


GridSearchCV(estimator=SVC(C=5, gamma=0.0001, kernel='linear'),
             param_grid={'C': [1, 5, 10, 50],
                         'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01]},
             verbose=3)

In [12]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 5, 'gamma': 0.0001}
0.7866666666666667
