# Model Selection

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

df = pd.read_csv('Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [9]:
X = df.iloc[:,2:4].values  #matrix, we only take age and estimatedSalary attributes to predict purchased for our model 
y = df.iloc[:,4].values  #vector
#X = np.matrix(df[['Age','EstimatedSalary']])
#y = np.array(df['Purchased'])

from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, random_state = 0) 

from sklearn.preprocessing import StandardScaler
scale_X = StandardScaler()
X_train = scale_X.fit_transform(X_train)
X_test = scale_X.transform(X_test)

from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', gamma = 'auto', random_state = 0) #rbf = gaussian Kernel
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)


from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

from sklearn.metrics import accuracy_score
print('Accuracy : ', accuracy_score(y_pred, y_test))

[[64  4]
 [ 3 29]]
Accuracy :  0.93


## Applying K-Fold Cross Validation

In [3]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print('Accuracies : ', accuracies)
print('Accuracy mean : ', accuracies.mean())
print('Accuracy standard deviation : ', accuracies.std())

Accuracies :  [0.80645161 0.96666667 0.8        0.93333333 0.86666667 0.83333333
 0.93333333 0.93333333 0.96666667 0.96551724]
Accuracy mean :  0.9005302187615868
Accuracy standard deviation :  0.06388957356626285


## Applying Grid Search 

We apply grid search in order to find the best model and the best parameters

In [4]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [1,10,100,1000], 'kernel': ['linear']},
              {'C': [1,10,100,1000], 'kernel': ['rbf'], 'gamma': [0.001,0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]},
              {'C': [1,10,100,1000], 'kernel': ['poly'], 'gamma': [0.001,0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]}
             ]
grid_search = GridSearchCV(estimator = classifier, param_grid = parameters, scoring = 'accuracy', cv = 10, n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_params = grid_search.best_params_
print('Best accuracy : ', best_accuracy)
print('Best parameters : ', best_params)

Best accuracy :  0.9033333333333333
Best parameters :  {'C': 1, 'gamma': 0.7, 'kernel': 'rbf'}


