# Support Vector Machine

## Imports

### Python and Sys

In [1]:
from tqdm.autonotebook import tqdm
import numpy as np
import pandas as pd
import random



###  Models and Visualisation

In [2]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

from sklearn.svm import SVC, LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

### Source files

In [3]:
from tools import *

## Loading and Splitting Data

In [4]:
data = np.load("15_scenes_Xy.npz", "rb")

In [5]:
def train_val_test_split(data, train_size=0.7, val_size=0.1, seed=1337):
    """
    """
    
    X, y = data['X'], data['y']    
    
    train_ids, val_test_ids = compute_split(X.shape[0], pc=train_size, seed=seed)
    val_ids,test_ids = compute_split(len(val_test_ids), pc=val_size)
    
    X_train, y_train = X[train_ids], y[train_ids]
    X_val, y_val = X[val_ids], y[val_ids]
    X_test, y_test = X[test_ids], y[test_ids]
    
    return X_train, X_val, X_test, y_train, y_val, y_test

In [6]:
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(data)

## Training SVM

### C = 1

In [30]:
ovr_linear_svc = OneVsRestClassifier(LinearSVC(C=0.001))
ovr_linear_svc.fit(X_train, y_train);

In [31]:
y_pred_val = ovr_linear_svc.predict(X_val)

print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.917910447761194


In [32]:
ovr_linear = OneVsRestClassifier(LinearSVC(C=100))
ovr_linear.fit(X_train, y_train);

In [33]:
y_pred_val = ovr_linear.predict(X_val)

print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.9104477611940298


### GridSearch

In [9]:
ovr_linear_svc = OneVsRestClassifier(LinearSVC(random_state=42))
params = {"estimator__C" : [0.005, 0.01, 0.05, 0.1, 1]}
model_tuning = GridSearchCV(ovr_linear_svc, param_grid=params, cv=5, n_jobs=-1)
model_tuning.fit(X_train, y_train);

#### Best params

In [10]:
pd.DataFrame(model_tuning.cv_results_)



Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_estimator__C,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,...,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,23.990846,0.103701,0.671233,0.998487,0.005,{'estimator__C': 0.005},1,0.657686,0.998804,0.681458,...,0.680445,0.999203,0.666667,0.997213,0.669887,0.998014,0.672909,0.013934,0.008913,0.000771
1,24.326704,0.092705,0.665499,0.998646,0.01,{'estimator__C': 0.01},2,0.656101,0.998804,0.675119,...,0.667727,0.999602,0.657097,0.998408,0.671498,0.998014,0.251872,0.029276,0.007653,0.000539
2,24.339701,0.115097,0.657853,0.998168,0.05,{'estimator__C': 0.05},3,0.640254,0.997608,0.675119,...,0.659777,0.998805,0.650718,0.99801,0.663446,0.998014,0.694468,0.012722,0.011802,0.000406
3,24.096008,0.114044,0.657534,0.998408,0.1,{'estimator__C': 0.1},5,0.638669,0.998804,0.664025,...,0.664547,1.0,0.658692,0.998408,0.661836,0.997617,0.568444,0.017676,0.009685,0.000975
4,20.10931,0.073773,0.657853,0.998089,1.0,{'estimator__C': 1},3,0.640254,0.997608,0.673534,...,0.664547,0.999602,0.644338,0.997213,0.666667,0.997617,6.390781,0.063261,0.013135,0.00085


In [12]:
model_tuning.best_params_

{'estimator__C': 0.005}

In [13]:
y_pred_val = model_tuning.predict(X_val)
print("Accuracy of Validation Set : {}\n"
      "==========================".format(
          accuracy_score(y_val, y_pred_val)))

Accuracy of Validation Set : 0.917910447761194


In [18]:
accuracy_score(y_test, model_tuning.predict(X_test))

0.8943894389438944