In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

In [2]:
data = pd.read_csv("data_cleaned.csv")
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,0.818182,0.163647,1.0,0.0,0.5,0.0,0.5,0.0,0
1,0.454545,0.435262,1.0,1.0,0.0,1.0,1.0,1.0,0
2,0.545455,0.219253,0.0,1.0,0.5,1.0,1.0,0.0,0
3,0.727273,0.939882,1.0,0.0,0.0,1.0,1.0,0.5,0
4,0.090909,0.022339,1.0,0.0,1.0,0.0,1.0,0.0,0


# SVM

In [3]:
X = data.drop("conversion", axis=1)
y = data["conversion"]

In [4]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC 
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score , confusion_matrix, precision_score, recall_score, f1_score, classification_report

In [5]:
model = SVC()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)

acc = accuracy_score(y_test, y_pred_test)
f1 = f1_score(y_test, y_pred_test)
precision = precision_score(y_test, y_pred_test)
recall = recall_score(y_test, y_pred_test)

print("Accuracy : {}".format(acc))
print("Precision : {}".format(precision))
print("recall : {}".format(recall))
print("f1 : {}".format(f1))

print("----- TEST ------")
report = classification_report(y_test,y_pred_test, output_dict=True)
test_report = pd.DataFrame(report).transpose()
display(test_report)

Accuracy : 0.7268597570798029
Precision : 0.7797227722772277
recall : 0.6334212727623705
f1 : 0.6989987928708371
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.690616,0.820557,0.75,15498.0
1,0.779723,0.633421,0.698999,15541.0
accuracy,0.72686,0.72686,0.72686,0.72686
macro avg,0.735169,0.726989,0.724499,31039.0
weighted avg,0.735231,0.72686,0.724464,31039.0


#  SVM + GridSearch

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

C = [0.1, 1, 0.2, 1, 10, 100]
kernels = ["rbf", "rbf", "poly", "poly", "rbf", "linear"]
gammas = [0.1, 0.1, 1, 0.5, 10, 1]

for c, kernel, gamma in zip(C, kernels, gammas):
    model = SVC(C=c, gamma = gamma, kernel = kernel)
    print(model)
    model.fit(X_train, y_train)

    y_pred_test = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred_test)
    f1 = f1_score(y_test, y_pred_test)
    precision = precision_score(y_test, y_pred_test)
    recall = recall_score(y_test, y_pred_test)

    print("Accuracy : {}".format(acc))
    print("Precision : {}".format(precision))
    print("recall : {}".format(recall))
    print("f1 : {}".format(f1))

    print("----- TEST ------")
    report = classification_report(y_test,y_pred_test, output_dict=True)
    test_report = pd.DataFrame(report).transpose()
    display(test_report)

SVC(C=0.1, gamma=0.1)
Accuracy : 0.6276507316695985
Precision : 0.6210000756486875
recall : 0.6398285268901013
f1 : 0.630273714921878
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.634602,0.615665,0.62499,26071.0
1,0.621,0.639829,0.630274,25660.0
accuracy,0.627651,0.627651,0.627651,0.627651
macro avg,0.627801,0.627747,0.627632,51731.0
weighted avg,0.627855,0.627651,0.627611,51731.0


SVC(C=1, gamma=0.1)
Accuracy : 0.6274574239817517
Precision : 0.6107643223748093
recall : 0.6863600935307872
f1 : 0.6463593658250147
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.648482,0.569483,0.606421,26071.0
1,0.610764,0.68636,0.646359,25660.0
accuracy,0.627457,0.627457,0.627457,0.627457
macro avg,0.629623,0.627922,0.62639,51731.0
weighted avg,0.629773,0.627457,0.626231,51731.0


SVC(C=0.2, gamma=1, kernel='poly')
Accuracy : 0.6381280083508921
Precision : 0.6470338983050847
recall : 0.5950896336710834
f1 : 0.6199756394640682
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.630657,0.680488,0.654625,26071.0
1,0.647034,0.59509,0.619976,25660.0
accuracy,0.638128,0.638128,0.638128,0.638128
macro avg,0.638845,0.637789,0.6373,51731.0
weighted avg,0.63878,0.638128,0.637438,51731.0


SVC(C=1, gamma=0.5, kernel='poly')
Accuracy : 0.63766406990006
Precision : 0.6468739381583418
recall : 0.5935307872174591
f1 : 0.6190553613527355
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.629971,0.681102,0.654539,26071.0
1,0.646874,0.593531,0.619055,25660.0
accuracy,0.637664,0.637664,0.637664,0.637664
macro avg,0.638423,0.637316,0.636797,51731.0
weighted avg,0.638355,0.637664,0.636938,51731.0


SVC(C=10, gamma=10)
Accuracy : 0.8205331426030813
Precision : 0.9967843708287829
recall : 0.640257209664848
f1 : 0.7796972141806274
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.73812,0.997967,0.848598,26071.0
1,0.996784,0.640257,0.779697,25660.0
accuracy,0.820533,0.820533,0.820533,0.820533
macro avg,0.867452,0.819112,0.814147,51731.0
weighted avg,0.866425,0.820533,0.814421,51731.0


SVC(C=100, gamma=1, kernel='linear')
Accuracy : 0.5696390945467902
Precision : 0.5570138632472894
recall : 0.6466874512860483
f1 : 0.5985103965663379
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.586782,0.493805,0.536294,26071.0
1,0.557014,0.646687,0.59851,25660.0
accuracy,0.569639,0.569639,0.569639,0.569639
macro avg,0.571898,0.570246,0.567402,51731.0
weighted avg,0.572016,0.569639,0.567155,51731.0


#  SVM + Wrapper + GS

In [6]:
X = data[["recency", "zip_code", "is_referral","channel", "offer"]]
y = data["conversion"]


model = SVC(C=10, gamma=10)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)

acc = accuracy_score(y_test, y_pred_test)
f1 = f1_score(y_test, y_pred_test)
precision = precision_score(y_test, y_pred_test)
recall = recall_score(y_test, y_pred_test)

print("Accuracy : {}".format(acc))
print("Precision : {}".format(precision))
print("recall : {}".format(recall))
print("f1 : {}".format(f1))

print("----- TEST ------")
report = classification_report(y_test,y_pred_test, output_dict=True)
test_report = pd.DataFrame(report).transpose()
display(test_report)

Accuracy : 0.8183294349616285
Precision : 0.9902053900035617
recall : 0.6437061048082118
f1 : 0.7802151543498597
----- TEST ------


Unnamed: 0,precision,recall,f1-score,support
0,0.73533,0.993609,0.845178,25817.0
1,0.990205,0.643706,0.780215,25914.0
accuracy,0.818329,0.818329,0.818329,0.818329
macro avg,0.862768,0.818657,0.812697,51731.0
weighted avg,0.863007,0.818329,0.812636,51731.0
