# Support Vector Machine

## Imports

### Python and Sys

In [3]:
from tqdm.autonotebook import tqdm
import numpy as np
import pandas as pd
import random



###  Models and Visualisation

In [4]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

from sklearn.svm import SVC, LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

### Source files

In [5]:
from tools import *

## Loading and Splitting Data

In [6]:
data = np.load("15_scenes_Xy.npz", "rb")

In [10]:
def train_val_test_split(data, train_size=0.7, val_size=0.1, seed=1337):
    """
    """
    
    X, y = data['X'], data['y']    
    
    train_ids, val_test_ids = compute_split(X.shape[0], pc=train_size, seed=seed)
    val_ids,test_ids = compute_split(len(val_test_ids), pc=val_size)
    
    X_train, y_train = X[train_ids], y[train_ids]
    X_val, y_val = X[val_ids], y[val_ids]
    X_test, y_test = X[test_ids], y[test_ids]
    
    return X_train, X_val, X_test, y_train, y_val, y_test

In [11]:
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(data)

## Training SVM

### C = 1

In [12]:
ovr_linear_svc = OneVsRestClassifier(LinearSVC(random_state=42))
ovr_linear_svc.fit(X_train, y_train);

In [13]:
y_pred_val = ovr_linear_svc.predict(X_val)

print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.9104477611940298


### GridSearch

In [19]:
ovr_linear_svc = OneVsRestClassifier(LinearSVC(random_state=42))
params = {"estimator__C" : [0.005, 0.01, 0.05, 0.1, 1]}
model_tuning = GridSearchCV(ovr_linear_svc, param_grid=params, cv=5, n_jobs=-1)
model_tuning.fit(X_train, y_train);

#### Best params

In [20]:
pd.DataFrame(model_tuning.cv_results_)



Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_estimator__C,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,...,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,23.68357,0.088276,0.672507,0.998567,0.005,{'estimator__C': 0.005},1,0.657686,0.998804,0.679873,...,0.685215,0.999602,0.671451,0.998806,0.668277,0.998014,1.601084,0.019323,0.009549,0.000694
1,20.035204,0.078218,0.666454,0.998567,0.01,{'estimator__C': 0.01},2,0.649762,0.998804,0.679873,...,0.672496,0.999602,0.658692,0.998408,0.671498,0.997617,0.556337,0.017603,0.010798,0.000645
2,19.821432,0.087963,0.65626,0.998567,0.05,{'estimator__C': 0.05},3,0.641838,0.998804,0.673534,...,0.659777,0.999602,0.650718,0.998408,0.655395,0.997617,0.276459,0.009725,0.010514,0.000645
3,23.808737,0.112823,0.65403,0.996338,0.1,{'estimator__C': 0.1},4,0.640254,0.998804,0.671949,...,0.672496,0.999203,0.623604,0.987261,0.661836,0.997617,2.143888,0.045154,0.019183,0.004569
4,24.094015,0.081058,0.652756,0.998089,1.0,{'estimator__C': 1},5,0.637084,0.998804,0.66878,...,0.653418,0.998805,0.644338,0.997213,0.660225,0.997617,5.985179,0.029802,0.011237,0.000636


In [21]:
model_tuning.best_params_

{'estimator__C': 0.005}

In [22]:
y_pred_val = model_tuning.predict(X_val)
print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.917910447761194


In [18]:
accuracy_score(y_test, model_tuning.predict(X_test))

0.8943894389438944