In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline

In [2]:
df = pd.read_csv("C:/Users/admin/Intern/DataSets/online_shoppers_intention.csv")

In [3]:
df.head()

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,0,0.0,0,0.0,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,0,0.0,0,0.0,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,0,0.0,0,0.0,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,False


In [4]:
df.shape

(12330, 18)

In [5]:
cols = ['Month', 'VisitorType']
encoder = LabelEncoder()
for col in cols:
	df[col] = encoder.fit_transform(df[col])
 
df['Weekend'] = df['Weekend'].astype('int')
df['Revenue'] = df['Revenue'].astype('int')

In [6]:
pipe = Pipeline([
	('scaler', StandardScaler()),
	('svc', SVC())
])

In [7]:
X = df.drop('Revenue', axis=1)
y = df['Revenue']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# pipe.set_params(svc__C=0.1)
pipe.fit(X_train, y_train)

0,1,2
,steps,"[('scaler', ...), ('svc', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [10]:
y_pred = pipe.predict(X_test)

In [11]:
print("Initial Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Initial Accuracy: 0.8803730738037308
[[1991   64]
 [ 231  180]]
              precision    recall  f1-score   support

           0       0.90      0.97      0.93      2055
           1       0.74      0.44      0.55       411

    accuracy                           0.88      2466
   macro avg       0.82      0.70      0.74      2466
weighted avg       0.87      0.88      0.87      2466



In [12]:
parameters = {
	'C': [0.1, 0.5, 1, 10],
	'gamma': ['scale', 0.01, 0.001],
	'kernel': ['linear', 'rbf', 'poly']
}

In [13]:
svc = SVC()
grid = GridSearchCV(svc, parameters, cv=5, n_jobs=-1)

In [14]:
X_train_scaled = pipe.named_steps['scaler'].transform(X_train)
X_test_scaled = pipe.named_steps['scaler'].transform(X_test)

In [15]:
grid.fit(X_train_scaled, y_train)

0,1,2
,estimator,SVC()
,param_grid,"{'C': [0.1, 0.5, ...], 'gamma': ['scale', 0.01, ...], 'kernel': ['linear', 'rbf', ...]}"
,scoring,
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,C,10
,kernel,'rbf'
,degree,3
,gamma,0.01
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [16]:
print("Best Parameters:", grid.best_params_)
print("Best Score:", grid.best_score_)

Best Parameters: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
Best Score: 0.8959857672904701


In [None]:
best_svc = grid.best_estimator_
y_pred_best = best_svc.predict(X_test_scaled)

In [18]:
print("Tuned Accuracy:", accuracy_score(y_test, y_pred_best))
print(confusion_matrix(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best))

Tuned Accuracy: 0.8856447688564477
[[1989   66]
 [ 216  195]]
              precision    recall  f1-score   support

           0       0.90      0.97      0.93      2055
           1       0.75      0.47      0.58       411

    accuracy                           0.89      2466
   macro avg       0.82      0.72      0.76      2466
weighted avg       0.88      0.89      0.87      2466

