# Support Vector Machine Model

## Importing libraries

In [7]:
import pickle
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
import pandas as pd

## Reading in CSV data
Reading the dataset from a CSV into a DataFrame for use with the model.
Remove unnecessary data and split into data and labels.

In [8]:
music_df = pd.read_csv(r'D:\Uni\Y3\FYP\Code\Music Classification\extracted-features.csv')

## This will contain the data
data = music_df.iloc[:, 1:].values
## This will contain the labels corresponding to the data
labels = music_df.iloc[:, 0].values

## Cross-validate a SVM to find the best hyper-parameters

Use GridSearchCV to evaluate the optimal hyperparameters for the algorithm.

In [9]:
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size = 0.30)

svm_model = LinearSVC()

param_grid = {"penalty": ["l1", "l2"],
              "loss": ["hinge", "squared_hinge"],
              "C": [0.1, 1, 10, 100, 1000],}

scoring_metrics = {"Accuracy": "accuracy", "AUC": "roc_auc_ovr", "F1": "f1_macro"}

## roc_auc_ovr sets the score to use roc_auc in one vs rest mode
## StratifiedKFold is used by default for the cross validation
svm_grid_search_cv = GridSearchCV(svm_model, param_grid,
                                  scoring = scoring_metrics, refit = "AUC", cv = 5)

svm_grid_search_cv.fit(data_train, labels_train)

best_params = svm_grid_search_cv.best_params_

print(best_params)

## Best score is using the metric defined in refit
print("ROC AUC: " + str(svm_grid_search_cv.best_score_))

best_penalty = best_params["penalty"]
best_loss = best_params["loss"]
best_c_value = best_params["C"]


Traceback (most recent call last):
  File "C:\Users\Jack\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Jack\anaconda3\lib\site-packages\sklearn\svm\_classes.py", line 233, in fit
    self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
  File "C:\Users\Jack\anaconda3\lib\site-packages\sklearn\svm\_base.py", line 965, in _fit_liblinear
    solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
  File "C:\Users\Jack\anaconda3\lib\site-packages\sklearn\svm\_base.py", line 821, in _get_liblinear_solver_type
    raise ValueError('Unsupported set of arguments: %s, '
ValueError: Unsupported set of arguments: The combination of penalty='l1' and loss='hinge' is not supported, Parameters: penalty='l1', loss='hinge', dual=True

Traceback (most recent call last):
  File "C:\Users\Jack\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", li

AttributeError: 'LinearSVC' object has no attribute 'predict_proba'

## Produce SVM model

Produce a SVM model using the optimal hyperparameters and save to a Pickle file for use with Azure.

In [None]:
knn_model = LinearSVC(penalty = best_penalty, loss = best_loss, class_weight = best_c_value)
knn_model.fit(data_train, labels_train)

with open("models/linear-svm.pkl", mode = "wb") as model_file:
    pickle.dump(svm_model, model_file)