In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('Social_Network_Ads.csv')

In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)

In [4]:
indep=dataset[['Age', 'EstimatedSalary','Gender_Male']]
dep=dataset['Purchased']

In [5]:
#split into training set and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)


In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [7]:
from sklearn.svm import SVC

In [17]:
from sklearn.model_selection import GridSearchCV

param_grid = {'kernel':['linear', 'poly', 'rbf','sigmoid'],
             'gamma':['auto','scale'],
             'C':[10,100,1000,2000,3000],
             'decision_function_shape':['ovo', 'ovr']} 

grid = GridSearchCV(SVC(probability=True), param_grid, refit = True, verbose = 3,cv=5,n_jobs=-1,scoring='f1_weighted') 
   
# fitting the model for grid search 
grid.fit(X_train, y_train) 

Fitting 5 folds for each of 80 candidates, totalling 400 fits


In [18]:
re=grid.cv_results_
grid_predictions = grid.predict(X_test) 

In [19]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, grid_predictions)

In [20]:
print(cm)

[[80  5]
 [ 7 42]]


In [21]:
# print classification report 
from sklearn.metrics import classification_report
clf_report = classification_report(y_test, grid_predictions)

In [22]:
print(clf_report)

              precision    recall  f1-score   support

           0       0.92      0.94      0.93        85
           1       0.89      0.86      0.88        49

    accuracy                           0.91       134
   macro avg       0.91      0.90      0.90       134
weighted avg       0.91      0.91      0.91       134



In [23]:
from sklearn.metrics import f1_score
f1_macro=f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'C': 100, 'decision_function_shape': 'ovo', 'gamma': 'auto', 'kernel': 'rbf'}: 0.9100355779243318


In [24]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(X_test)[:,1])

0.9539015606242497

In [25]:
table=pd.DataFrame.from_dict(re)

In [26]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_decision_function_shape,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.024145,0.004484,0.007554,0.002185,10,ovo,auto,linear,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.776290,0.790949,0.698235,0.923510,0.901744,0.818146,0.083619,41
1,0.029567,0.002550,0.003648,0.001164,10,ovo,auto,poly,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.799620,0.808392,0.787943,0.924528,0.901744,0.844445,0.056918,21
2,0.022452,0.008258,0.009124,0.002160,10,ovo,auto,rbf,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.867478,0.886792,0.869709,0.944161,0.943041,0.902236,0.034431,5
3,0.019673,0.002274,0.004188,0.002829,10,ovo,auto,sigmoid,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.762677,0.738916,0.655795,0.796284,0.766556,0.744045,0.047743,77
4,0.029741,0.002716,0.006358,0.004646,10,ovo,scale,linear,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.776290,0.790949,0.698235,0.923510,0.901744,0.818146,0.083619,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,0.018681,0.003315,0.010132,0.003877,3000,ovr,auto,sigmoid,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.791752,0.714931,0.698113,0.796284,0.766556,0.753527,0.040048,71
76,2.629176,1.383352,0.007938,0.003248,3000,ovr,scale,linear,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.776290,0.790949,0.698235,0.923510,0.901744,0.818146,0.083619,41
77,2.817012,1.482789,0.006385,0.004820,3000,ovr,scale,poly,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.822092,0.765553,0.766556,0.924528,0.901744,0.836095,0.066541,29
78,0.226424,0.028827,0.006659,0.002473,3000,ovr,scale,rbf,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.826263,0.866968,0.851527,0.847020,0.883278,0.855011,0.019206,17
