# This notebook is part of section 5.7

## Import the datasets

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
%matplotlib inline

## Fix the Path

In [8]:
os.chdir("D:")
os.getcwd()

'D:\\'

## Load the dataset

In [9]:
dataset=pd.read_csv('ads.csv')
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


## Split the dataset

In [11]:
X=dataset.iloc[:,[2,3]].values
y=dataset.iloc[:,4].values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

## Perform Feature Scaling

In [12]:
sc=StandardScaler()
X_train=sc.fit_transform(X_train) 
X_test=sc.transform(X_test) ## Only transform



## Train a Support Vector Classifer

In [13]:
classifier=SVC(kernel='rbf',random_state=42)
classifier.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=42, shrinking=True,
  tol=0.001, verbose=False)

## Test the SVC using Confusion Matrix and Cross-Validation

In [26]:
y_pred=classifier.predict(X_test)
cm=confusion_matrix(y_test,y_pred)
accuracies=cross_val_score(estimator=classifier,X=X_train,y=y_train,cv=10)
print(cm)
print(accuracies.mean())
print(accuracies.std())

[[68  5]
 [ 2 45]]
0.8922459405218026
0.04844494533063745


## Perform Grid Search

In [21]:
parameters=[{'C':[1,10,100,1000],'kernel':['linear']},
           {'C':[1,10,100,1000],'kernel':['rbf'],'gamma':[0.5,0.1,0.01,0.001,0.0001]}]
grid_search=GridSearchCV(estimator=classifier,param_grid=parameters,
                        scoring='accuracy',cv=10,n_jobs=-1)
grid_search=grid_search.fit(X_train,y_train)
best_accuracy=grid_search.best_score_
best_accuracy

0.8928571428571429

## Get the best parameters

In [22]:
best_parameters=grid_search.best_params_
best_parameters

{'C': 1, 'gamma': 0.5, 'kernel': 'rbf'}

## Fine-tune even more

In [23]:
parameters=[{'C':[1,10,100,1000],'kernel':['linear']},
           {'C':[1,10,100,1000],'kernel':['rbf'],'gamma':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]}]
grid_search=GridSearchCV(estimator=classifier,param_grid=parameters,
                        scoring='accuracy',cv=10,n_jobs=-1)
grid_search=grid_search.fit(X_train,y_train)
best_accuracy=grid_search.best_score_
print(best_accuracy)
best_parameters=grid_search.best_params_
print(best_parameters)

0.9
{'C': 100, 'gamma': 0.6, 'kernel': 'rbf'}
