# Support Vector Machine

## Imports

### Python and Sys

In [84]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import random

###  Models and Visualisation

In [68]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

from sklearn.svm import SVC, LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

### Source files

In [60]:
from tools import *

## Loading and Splitting Data

In [2]:
data = np.load("15_scenes_Xy.npz", "rb")

In [42]:
def train_val_test_split(data, train_size=0.7, val_size=0.1, seed=1337):
    """
    """
    
    X, y = data['X'], data['y']    
    
    train_ids, val_test_ids = compute_split(X.shape[0], pc=train_size, seed=seed)
    val_ids,test_ids = compute_split(len(val_test_ids), pc=val_size)
    
    X_train, y_train = X[indexes[train_ids]], y[indexes[train_ids]]
    X_val, y_val = X[indexes[val_ids]], y[indexes[val_ids]]
    X_test, y_test = X[indexes[test_ids]], y[indexes[test_ids]]
    
    return X_train, X_val, X_test, y_train, y_val, y_test

In [43]:
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(data)

## Training SVM

### C = 1

In [75]:
ovr_linear_svc = OneVsRestClassifier(LinearSVC(random_state=42))
ovr_linear_svc.fit(X_train, y_train);

In [76]:
y_pred_val = ovr_linear_svc.predict(X_val)

print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.9328358208955224


### GridSearch

In [90]:
ovr_linear_svc = OneVsRestClassifier(LinearSVC(random_state=42))
params = {"estimator__C" : [0.01, 0.05, 0.1, 1, 5]}
model_tuning = GridSearchCV(ovr_linear_svc, param_grid=params, cv=5, n_jobs=-1)
model_tuning.fit(X_train, y_train);

#### Best params

In [91]:
pd.DataFrame(model_tuning.cv_results_)



Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_estimator__C,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,...,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,17.443486,0.093827,0.658809,0.998646,0.01,{'estimator__C': 0.01},1,0.661392,0.998803,0.666667,...,0.633758,0.998407,0.686901,0.999204,0.645265,0.998808,0.439281,0.01505,0.01825,0.000407
1,17.616742,0.090472,0.6518,0.998885,0.05,{'estimator__C': 0.05},2,0.65981,0.998404,0.65873,...,0.627389,0.998805,0.677316,0.998806,0.635634,0.998808,0.403865,0.021073,0.017986,0.000391
2,17.710073,0.107364,0.650844,0.997848,0.1,{'estimator__C': 0.1},3,0.653481,0.994815,0.666667,...,0.617834,0.998009,0.674121,0.998806,0.642055,0.998808,0.574195,0.023983,0.019825,0.001548
3,17.444889,0.106006,0.646384,0.998726,1.0,{'estimator__C': 1},5,0.648734,0.998803,0.665079,...,0.627389,0.998805,0.666134,0.999204,0.624398,0.99841,0.422002,0.04375,0.017809,0.000298
4,15.044263,0.0543,0.648296,0.998407,5.0,{'estimator__C': 5},4,0.651899,0.998803,0.663492,...,0.624204,0.998009,0.669329,0.998806,0.632424,0.998013,4.118658,0.04338,0.017416,0.000355


In [94]:
model_tuning.best_params_

{'estimator__C': 0.01}

In [95]:
y_pred_val = model_tuning.predict(X_val)
print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.9328358208955224
