# Grid Search

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Pipeline

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ('sc', StandardScaler()), 
    ('svc', SVC(C = 1, kernel = 'rbf', gamma = 0.7, random_state = 0))
])

## Applying k-Fold Cross Validation

In [5]:
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator = pipe, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 91.00 %
Standard Deviation: 6.67 %


## Applying Grid Search to find the best model and the best parameters

In [6]:
from sklearn.model_selection import GridSearchCV

# we created 2 dictionaries because the 'gama' hyperparameter can only be used with 'rbf', 'poly' or 'sigmoid' kernel, not with the 'linear'
# otherwise, we wouldn't need the second dictionary, the first one would be enough
parameters = [{'svc__C': [0.25, 0.5, 0.75, 1], 'svc__kernel': ['linear']},
              {'svc__C': [0.25, 0.5, 0.75, 1], 'svc__kernel': ['rbf', 'poly', 'sigmoid'], 'svc__gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]

#double '_' are used to set parameters in parameters
grid_search = GridSearchCV(estimator = pipe,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_

print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 91.00 %
Best Parameters: {'svc__C': 1, 'svc__gamma': 0.7, 'svc__kernel': 'rbf'}


## Making the Confusion Matrix

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score

y_pred = grid_search.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[64  4]
 [ 3 29]]


0.93