In [1]:
import numpy  as np
import pandas as pd

from CustomLogisticRegression import LogisticRegresssion as clr

In [2]:
dfRaw = pd.read_csv('ClickExample.csv')
dfRaw.tail()

Unnamed: 0,Names,emails,Country,Time Spent on Site,Salary,Clicked
494,Rigel,egestas.blandit.Nam@semvitaealiquam.com,Sao Tome and Principe,19.222746,44969.13495,0
495,Walter,ligula@Cumsociis.ca,Nepal,22.665662,41686.20425,0
496,Vanna,Cum.sociis.natoque@Sedmolestie.edu,Zimbabwe,35.320239,23989.80864,0
497,Pearl,penatibus.et@massanonante.com,Philippines,26.53917,31708.57054,0
498,Nell,Quisque.varius@arcuVivamussit.net,Botswana,32.386148,74331.35442,1


### Clean, Encode, Scale

In [3]:
dfCleaned = dfRaw

dfCleaned['Country_Encoded'] = pd.factorize(dfCleaned['Country'])[0]

dfCleaned = dfCleaned.drop(columns=['emails', 'Country', 'Names', 'Country_Encoded'])

dfCleaned = dfCleaned[['Clicked', 'Time Spent on Site', 'Salary']]

dfCleaned['Time Spent on Site'] = dfCleaned['Time Spent on Site'].round(2).astype(np.int64)
dfCleaned['Salary'] = dfCleaned['Salary'].round(2).astype(np.int64)

dfCleaned = dfCleaned.rename(columns={'Time Spent on Site':'Time_Spent'})

dfCleaned.tail()

Unnamed: 0,Clicked,Time_Spent,Salary
494,0,19,44969
495,0,22,41686
496,0,35,23989
497,0,26,31708
498,1,32,74331


In [4]:
dfScaled = dfCleaned

from sklearn.preprocessing import StandardScaler

sScaler = StandardScaler()

dfScaled[['Time_Spent', 'Salary']] = sScaler.fit_transform(X=dfCleaned[['Time_Spent', 'Salary']])

dfScaled

Unnamed: 0,Clicked,Time_Spent,Salary
0,0,-0.814931,0.128281
1,1,-0.046215,1.378614
2,0,-1.364013,-0.621950
3,1,2.369747,-0.830435
4,0,0.173418,-0.819260
...,...,...,...
494,0,-1.473830,-0.417892
495,0,-1.144380,-0.590954
496,0,0.283234,-1.523840
497,0,-0.705114,-1.116938


In [5]:
dfReady = dfScaled
dfReady

Unnamed: 0,Clicked,Time_Spent,Salary
0,0,-0.814931,0.128281
1,1,-0.046215,1.378614
2,0,-1.364013,-0.621950
3,1,2.369747,-0.830435
4,0,0.173418,-0.819260
...,...,...,...
494,0,-1.473830,-0.417892
495,0,-1.144380,-0.590954
496,0,0.283234,-1.523840
497,0,-0.705114,-1.116938


### Split Train Test

In [6]:
from sklearn.model_selection import train_test_split

X = dfReady[['Salary', 'Time_Spent']]
y = dfReady['Clicked']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((349, 2), (150, 2), (349,), (150,))

### Train Model with SciKitLearn

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report


model01_LR = LogisticRegression()
model01_LR.fit(X_train, y_train)
y_pred_train = model01_LR.predict(X_train)
y_pred_test  = model01_LR.predict(X_test)


print("On Train:\n")
print(classification_report(y_pred_train, y_train))

print("On Test:\n")
print(classification_report(y_pred_test, y_test))

On Train:

              precision    recall  f1-score   support

           0       0.91      0.91      0.91       173
           1       0.91      0.91      0.91       176

    accuracy                           0.91       349
   macro avg       0.91      0.91      0.91       349
weighted avg       0.91      0.91      0.91       349

On Test:

              precision    recall  f1-score   support

           0       0.91      0.92      0.92        76
           1       0.92      0.91      0.91        74

    accuracy                           0.91       150
   macro avg       0.91      0.91      0.91       150
weighted avg       0.91      0.91      0.91       150



### Train Model with my module

In [13]:
myModel = clr(epochs=1000, learningRate=0.01)

myModel.fit(X_train, y_train)

myModel

Coefs: [1.44298016 1.514234  ]
Intercept:-0.014171875167040257