# SVM
## Training an SVM classifier on top of the extracted CNN Nodes 

In [1]:
# generic
import numpy as np
import os
import matplotlib.pyplot as plt
import h5py
import itertools
import matplotlib
#matplotlib.style.use('ggplot')

# sklearn
from sklearn import cross_validation, grid_search
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import GridSearchCV
import pandas as pd


  from ._conv import register_converters as _register_converters


importing necessary data 

In [2]:
X_test = np.load("data/SVM-X_test.npy")
y_test = pd.read_csv("data/SVM-y_test.csv")
y_test = y_test['0']

In [3]:
X_train = np.load("data/SVM-X_train.npy")
y_train = pd.read_csv("data/SVM-y_train.csv")
y_train = list(y_train['0'])

In [4]:
print(len(X_test), len(y_test))
print(len(X_train), len(y_train))

1252 1252
3810 3810


In [5]:
gt = pd.read_csv(os.path.join('data/sc5-test','ground_truth.txt'),sep = ';', header = None)
y_test_new = []
for ind, val in enumerate(y_test):
    y_test_new.append(list(gt[gt[0]==val+'.jpg'][1])[0])

In [6]:
from numpy import array
from numpy import argmax
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import PCA

def onehot(y_test_new):
    values = np.array(y_test_new)
    print(values)
    # integer encode
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
    # binary encode
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    return integer_encoded
y_test = onehot(y_test_new)
y_train = onehot(y_train)

['Gondola' 'Gondola' 'Gondola' ... 'Motopontone rettangolare'
 'Motopontone rettangolare' 'Motopontone rettangolare']
['Gondola' 'Gondola' 'Gondola' ... 'Barchino' 'Barchino' 'Barchino']


In [7]:
def scale_set(train,test):
    """uses sklearn standard sclar to normalize data"""
    sc = StandardScaler()
    fitted = sc.fit(train)
    return sc.transform(train), sc.transform(test)
#X_train, X_test = scale_set(X_train, X_test)
def my_PCA(train,test):
    '''applies PCA to the data'''
    pca = PCA(n_components=train.shape[1])
    data = pca.fit(train)
    return pca.transform(train), pca.transform(test)
X_train, X_test = my_PCA(X_train, X_test)

In [8]:
y_test = y_test.ravel()
y_train = y_train.ravel()

# Now I will train three SVM classifiers:
* SGD
* Linear SVC
* SVC (and with bagging)

## SGD

In [10]:
from sklearn.linear_model import SGDClassifier
np.random.seed(42)

In [11]:
clf = SGDClassifier(loss="hinge",
                    penalty="l2",
                    n_jobs=-1,
                    learning_rate='invscaling',
                    eta0 = 3,
                    n_iter = 112)
clf.fit(X_train, y_train)



SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=3, fit_intercept=True, l1_ratio=0.15,
       learning_rate='invscaling', loss='hinge', max_iter=None, n_iter=112,
       n_jobs=-1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False)

In [12]:
y_predict=clf.predict(X_test)
print("\nClassification report:")
print(classification_report(y_test, y_predict))
print("The accuracy is {:.1f}%".format(accuracy_score(y_test, y_predict, normalize=True)*100))


Classification report:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        19
          1       0.00      0.00      0.00        22
          2       0.07      0.02      0.03        51
          3       0.00      0.00      0.00         3
          4       0.00      0.00      0.00         7
          5       0.16      0.12      0.14       217
          6       0.06      0.06      0.06       125
          7       0.00      0.00      0.00         6
          8       0.04      0.03      0.04        59
          9       0.00      0.00      0.00         3
         10       0.00      0.00      0.00         1
         11       0.21      0.25      0.23       274
         12       0.09      0.08      0.08        74
         13       0.00      0.00      0.00        15
         14       0.00      0.00      0.00        19
         15       0.00      0.00      0.00         3
         16       0.04      0.03      0.04        29
         17       0.2

## LinearSVC

In [None]:
clf = LinearSVC(random_state=42,C=100,dual=False)
clf.fit(X_train, y_train)

In [None]:
y_predict=clf.predict(X_test)
print("\nClassification report:")
print(classification_report(y_test, y_predict))
print("The accuracy is {:.1f}%".format(accuracy_score(y_test, y_predict, normalize=True)*100))

## SVC (kernel trick) 
  

In [9]:
clf = SVC(decision_function_shape='ovr')
clf.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [None]:
print("\nBest parameters set:")
print(clf.best_params_)

In [10]:
y_predict=clf.predict(X_test)
print("\nClassification report:")
print(classification_report(y_test, y_predict))
print("The accuracy is {:.1f}%".format(accuracy_score(y_test, y_predict, normalize=True)*100))


Classification report:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        19
          1       0.00      0.00      0.00        22
          2       0.00      0.00      0.00        51
          3       0.00      0.00      0.00         3
          4       0.00      0.00      0.00         7
          5       0.00      0.00      0.00       217
          6       1.00      0.01      0.02       125
          7       0.00      0.00      0.00         6
          8       0.00      0.00      0.00        59
          9       0.00      0.00      0.00         3
         10       0.00      0.00      0.00         1
         11       0.19      0.44      0.26       274
         12       0.00      0.00      0.00        74
         13       0.00      0.00      0.00        15
         14       0.00      0.00      0.00        19
         15       0.00      0.00      0.00         3
         16       0.00      0.00      0.00        29
         17       0.2

  'precision', 'predicted', average, warn_for)
