# Logistic Regression from scratch #

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score

In [2]:
dataset = pd.read_csv('BuyComputer.csv')
dataset.drop(columns=['User ID',],axis=1,inplace=True)
dataset.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [3]:
X = dataset.iloc[:,:-1]
Y = dataset.loc[:,'Purchased']
print(X)
print(Y)

     Age  EstimatedSalary
0     19            19000
1     35            20000
2     26            43000
3     27            57000
4     19            76000
..   ...              ...
395   46            41000
396   51            23000
397   50            20000
398   36            33000
399   49            36000

[400 rows x 2 columns]
0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64


In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,random_state=53)
print("Training data: ", X_train.shape)
print("Testing data: ", X_test.shape)

Training data:  (300, 2)
Testing data:  (100, 2)


In [5]:
# Scaling data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [6]:
no_of_rows = X_train.shape[0]
no_of_columns = X_train.shape[1]

w = np.zeros(X_train.shape[1])
b = 0
iteration = 3000
learning_rate = 0.1

In [7]:
# Sigmoid function
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [8]:
def predict(data):
    z = np.dot(data, w) + b
    y = sigmoid(z)
    for i in range(len(y)):
        if(y[i] >= 0.5):
            y[i] = 1
        else:
            y[i] = 0
    return y

In [9]:
# Loss function
def loss(y, y_hat):
    loss = np.sum(-y * np.log(y_hat) - (1-y)*np.log(1-y_hat))
    loss = loss / no_of_rows
    return loss

In [10]:
for i in range(iteration):
    y_predicated = sigmoid( (np.dot(w,X_train.T) + b) );
    cost = loss(y_train, y_predicated);
    wd = np.dot((y_predicated - y_train).T, X_train) / no_of_rows
    bd = np.mean(y_predicated - y_train)
    w = w - learning_rate * wd
    b = b - learning_rate * bd
    #print("Round:",i,"Weight:",w,"Bias:",b)

In [11]:
print("Weight : ",w)
print("Bias : " ,b)

y_test_pred = predict( X_test )
print()
ind = 0
for i in y_test:
    print("y_test predictions : ", y_test_pred[ind], end = "  ->  ")
    print("y_test actual : ",i)
    ind += 1

Weight :  [2.56230651 1.08046423]
Bias :  -1.4671580353706597

y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  1.0  ->  y_test actual :  1
y_test predictions :  0.0  ->  y_test actual :  1
y_test predictions :  0.0  ->  y_test actual :  1
y_test predictions :  1.0  ->  y_test actual :  1
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  1
y_test predictions :  1.0  ->  y_test actual :  1
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  1.0  ->  y_test actual :  1
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test actual :  0
y_test predictions :  0.0  ->  y_test

In [12]:
print("Accuracy score : ",accuracy_score(y_test_pred,y_test))

Accuracy score :  0.78


# Logistic Regression of Sklearn #

In [13]:
from sklearn.linear_model import LogisticRegression
model= LogisticRegression(random_state = 53)

model.fit(X_train,y_train)

pred = model.predict(X_test)
print("Accuracy score : ",model.score(X_test,y_test))

Accuracy score :  0.78
