GRIDSEARCHCV WITH HYPERPARAMETER TUNING 

In [23]:
# IMPORT THE LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [24]:
# READ THE DATASET

In [25]:
dataset=pd.read_csv('Advertising_data.csv')

In [26]:
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [27]:
# DIVIDE THE DATA SET INTO X AND Y

In [28]:
x=dataset.iloc[:,[2,3]]
y=dataset.iloc[:,4].values
x.head()

Unnamed: 0,Age,EstimatedSalary
0,19.0,19000.0
1,35.0,20000.0
2,26.0,43000.0
3,27.0,57000.0
4,19.0,76000.0


In [29]:
# SPLITTING THE DATASET INTO TRAIN AND TEST DATA

In [30]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

In [31]:
# FEATURE SCALING

In [32]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.fit_transform(x_test)

In [33]:
# FITTING THE SVM MODEL TO TRAIN DATASET

In [34]:
from sklearn.svm import SVC
classifier=SVC(kernel='linear',random_state=0)
classifier.fit(x_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=0,
    shrinking=True, tol=0.001, verbose=False)

In [35]:
# PREDICTION FOR TEST DATA SET

In [36]:
y_predict=classifier.predict(x_test)

In [37]:
# PERFORMANCE METRICS FOR CLASSIFICATION MODELS

In [38]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
cm=confusion_matrix(y_test,y_predict)
accuracyscore=accuracy_score(y_test,y_predict)
classificationreport=classification_report(y_test,y_predict)


In [39]:
cm

array([[52,  6],
       [ 3, 19]], dtype=int64)

In [40]:
accuracyscore

0.8875

In [41]:
print(classificationreport)

              precision    recall  f1-score   support

           0       0.95      0.90      0.92        58
           1       0.76      0.86      0.81        22

    accuracy                           0.89        80
   macro avg       0.85      0.88      0.86        80
weighted avg       0.89      0.89      0.89        80



In [42]:
# APPLYING GRIDSEARHCV FOR BEST HYPERPARAMETERS AND SELECTION OF BEST MODEL

In [46]:
from sklearn.model_selection import GridSearchCV
parameters=[{'C':[1,10,100,1000],'kernel':['linear']},
           {'C':[1,10,100,1000],'kernel':['rbf'],'gamma':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]}]
grid_search=GridSearchCV(estimator=classifier,
                        param_grid=parameters,
                        scoring='accuracy',
                        cv=10,
                        n_jobs=-1)
grid_search=grid_search.fit(x_train,y_train)



In [48]:
# MODEL ACCURACY
accuracy=grid_search.best_score_

In [49]:
accuracy

0.90625

In [50]:
bestparameters=grid_search.best_params_

In [51]:
bestparameters

{'C': 1, 'gamma': 0.9, 'kernel': 'rbf'}

In [54]:
# APPLYING THE TUNED PARAMETERS TO MODEL
classifier=SVC(C= 1, gamma= 0.9, kernel= 'rbf',random_state=0)
classifier.fit(x_train,y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.9, kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [55]:
y_predict=classifier.predict(x_test)

In [56]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
cm=confusion_matrix(y_test,y_predict)
accuracyscore=accuracy_score(y_test,y_predict)
classificationreport=classification_report(y_test,y_predict)

In [57]:
cm

array([[55,  3],
       [ 1, 21]], dtype=int64)

In [58]:
accuracyscore

0.95

In [60]:
print(classificationreport)

              precision    recall  f1-score   support

           0       0.98      0.95      0.96        58
           1       0.88      0.95      0.91        22

    accuracy                           0.95        80
   macro avg       0.93      0.95      0.94        80
weighted avg       0.95      0.95      0.95        80

