# Support Vector Machines - SVC() Scikit-Learn - Classification

### Mathieu Vandecasteele - https://mathieuvdc.com/

In [None]:
# Useful Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import time
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.datasets import load_digits

In [None]:
# Loading the Data and optional pre-processing. You need to have your X and your y. It is recommended to process some descriptive statistics before training.
# X =
# y =
print("Shape of X is "+str(X.shape))
print("Shape of y is "+str(y.shape))

In [None]:
# Split the Data
test_size = 0.20
random_state = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
print("Spliting...")
print("Test ratio : "+str(test_size))
print("\nShape of Train dataset is "+str(X_train.shape))
print("Shape of Test dataset is "+str(X_test.shape))

In [None]:
# Standardization
# Set to True or False to standardize the data.
standardize = True

if standardize :
    print("\nStandardization ...")
    scaler = StandardScaler().fit(X_train)                                
    X_train = scaler.transform(X_train)                           
    X_test = scaler.transform(X_test)       

In [None]:
# First Simple Training

print("\nPerform First Simple Training...\n")
svc = SVC(gamma="scale")
print(svc)

time1 = time.time()
svc.fit(X_train, y_train)
elapsed = time.time() - time1
print("\nDone ! Time for training : "+str(elapsed)+" seconds.\n") 

# Simple Training Report
score = svc.score(X_test, y_test)
print("Precision/Score on Test Dataset : "+str(score)+"\n")

y_pred = svc.predict(X_test)
print("Classification Report on Test Dataset (y_pred/y_test) :\n")
print(classification_report(y_pred, y_test))
print("Confusion Matrix on Test Dataset (y_pred/y_test) :\n")
print(confusion_matrix(y_pred, y_test))

In [None]:
# Tuning parameters with GridSearch and Cross-Validation 

print("Perform GridSearchCV and tuning parameters... \n")

# Personalization of the parameters : 
parameters = {'kernel':('rbf','linear'), 'C':[0.1,1,2,5,7,8,9,10,12,20]}
cv_number = 5
# Set to -1 for using all the processors.
njobs = -1

svc = SVC(gamma="scale")
clf = GridSearchCV(svc, parameters, cv=cv_number, n_jobs=njobs, return_train_score=True)
print(clf)

time2 = time.time()
clf.fit(X_train,y_train)
elapsed2 = time.time() - time2
print("\nDone ! Time elapsed for tuning parameters : "+str(elapsed2)+" seconds.\n") 


# GridSearchCV Report

print("Best validation score : "+str(clf.best_score_))

print("\nBest parameters : ")
best_parameters = clf.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

score = clf.score(X_test, y_test)
print("\nPrecision/Score on Test Dataset : "+str(score)+"\n")

y_pred = clf.predict(X_test)
print("Classification Report on Test Dataset (y_pred/y_test) :\n")
print(classification_report(y_pred, y_test))
print("Confusion Matrix on Test Dataset (y_pred/y_test) :\n")
print(confusion_matrix(y_pred, y_test))
print("\nAverage training time for one model :") 
print(str(np.mean(clf.cv_results_['mean_fit_time']))+" seconds.")


# All results ("Set to True to see all results forom GridSearchCV")
display_gridsearch_details = False

if display_gridsearch_details :
    print('All results :')
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    print('')

In [None]:
# Last Final Training

print("Perform Last Final Training...\n")

# For parameters you can directly unpack the best params dictionary of the GridSearchCV classifier or write it manually.
svc = SVC(gamma='scale', **clf.best_params_)
print(svc)

time3 = time.time()
svc.fit(X_train, y_train)
elapsed3 = time.time() - time3
print("Done ! Time for training : "+str(elapsed3)+" seconds.\n") 

# Last Training Report
score = svc.score(X_test, y_test)
print("Precision/Score on Test Dataset : "+str(score)+"\n")

y_pred = svc.predict(X_test)
print("Classification Report on Test Dataset (y_pred/y_test) :\n")
print(classification_report(y_pred, y_test))
print("Confusion Matrix on Test Dataset (y_pred/y_test) :\n")
print(confusion_matrix(y_pred, y_test))

Finished !

You can use the gridsearch classifier to plot curves if you want...