In [1]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC, SVR
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, StratifiedKFold

from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.pyplot as plt
import seaborn as sns

### Gathering Data

In [2]:
df = pd.read_csv("Social_Network_Ads.csv")

In [3]:
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [4]:
df["Purchased"].value_counts()

0    257
1    143
Name: Purchased, dtype: int64

In [5]:
df["Gender"] = df["Gender"].replace({"Male":1, "Female":0})

In [8]:
df.drop("User ID", axis = 1, inplace= True)

In [9]:
df

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
...,...,...,...,...
395,0,46,41000,1
396,1,51,23000,1
397,0,50,20000,1
398,1,36,33000,0


In [10]:
x = df.drop("Purchased", axis = 1)
y = df["Purchased"]

In [11]:
x

Unnamed: 0,Gender,Age,EstimatedSalary
0,1,19,19000
1,1,35,20000
2,0,26,43000
3,0,27,57000
4,1,19,76000
...,...,...,...
395,0,46,41000
396,1,51,23000
397,0,50,20000
398,1,36,33000


In [12]:
scaler = MinMaxScaler()

### Train Test Split

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=12,stratify=y)

In [16]:
scaler.fit(x_train)

In [17]:
x_train = scaler.transform(x_train)

In [20]:
x_train = pd.DataFrame(x_train, columns=x.columns)

In [21]:
x_train

Unnamed: 0,Gender,Age,EstimatedSalary
0,0.0,0.761905,0.155556
1,1.0,0.166667,0.474074
2,1.0,0.785714,0.059259
3,0.0,0.214286,0.014815
4,1.0,0.476190,0.266667
...,...,...,...
295,0.0,0.714286,0.148148
296,0.0,0.261905,0.503704
297,1.0,0.714286,0.555556
298,0.0,0.952381,0.637037


### Model Training

In [22]:
svc_model = SVC()
svc_model.fit(x_train,y_train)

In [23]:
parameters = svc_model.get_params()
parameters

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
## Here the kernel is "rbf"  >> Radial Basis Function

### Model Evaluation

#### On test data

In [24]:
y_pred = svc_model.predict(scaler.transform(x_test))



In [26]:
conf_matrix = confusion_matrix(y_test,y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print()
print("Classification Report")
print(classification_report(y_test,y_pred))

Confusion Matrix:
[[56  8]
 [ 4 32]]

Classification Report
              precision    recall  f1-score   support

           0       0.93      0.88      0.90        64
           1       0.80      0.89      0.84        36

    accuracy                           0.88       100
   macro avg       0.87      0.88      0.87       100
weighted avg       0.89      0.88      0.88       100



#### On Train Data

In [27]:
y_pred_train = svc_model.predict(x_train)

In [28]:
conf_matrix = confusion_matrix(y_train,y_pred_train)
print("Confusion Matrix:")
print(conf_matrix)
print()
print("Classification Report")
print(classification_report(y_train,y_pred_train))

Confusion Matrix:
[[178  15]
 [ 12  95]]

Classification Report
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       193
           1       0.86      0.89      0.88       107

    accuracy                           0.91       300
   macro avg       0.90      0.91      0.90       300
weighted avg       0.91      0.91      0.91       300



### Hyperparameter Tuning

In [29]:
svc_model = SVC()

param_grid = {"C": np.arange(1,50),
             "kernel":["linear",'poly','rbf','sigmoid']}

kfold = StratifiedKFold(n_splits=3,shuffle=True,random_state=10)

gsvc_svc_model = GridSearchCV(svc_model,param_grid,cv=kfold)

gsvc_svc_model.fit(x_train,y_train)

In [30]:
gsvc_svc_model.best_estimator_

In [31]:
gsvc_svc_model.best_estimator_.get_params()

{'C': 3,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

### Model Evaluation

#### On Test Data

In [32]:
y_pred = gsvc_svc_model.best_estimator_.predict(scaler.transform(x_test))

conf_matrix = confusion_matrix(y_test,y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print()
print("Classification Report")
print(classification_report(y_test,y_pred))

Confusion Matrix:
[[56  8]
 [ 4 32]]

Classification Report
              precision    recall  f1-score   support

           0       0.93      0.88      0.90        64
           1       0.80      0.89      0.84        36

    accuracy                           0.88       100
   macro avg       0.87      0.88      0.87       100
weighted avg       0.89      0.88      0.88       100





#### On Train Data

In [33]:
y_pred_train = gsvc_svc_model.best_estimator_.predict(x_train)

conf_matrix = confusion_matrix(y_train,y_pred_train)
print("Confusion Matrix:")
print(conf_matrix)
print()
print("Classification Report")
print(classification_report(y_train,y_pred_train))

Confusion Matrix:
[[177  16]
 [ 12  95]]

Classification Report
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       193
           1       0.86      0.89      0.87       107

    accuracy                           0.91       300
   macro avg       0.90      0.90      0.90       300
weighted avg       0.91      0.91      0.91       300

