In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import zipfile
import random
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
zip_file = zipfile.ZipFile('logistic_regression.zip')
zip_file.extractall()
zip_file.close()

In [3]:
dataset = pd.read_csv("Social_Network_Ads.csv")

In [4]:
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [5]:
le = LabelEncoder()
le.fit(list(set(dataset['Gender'])))
dataset['Gender'] = le.transform(dataset['Gender'])
dataset.drop(['User ID'], axis = 1, inplace = True)

In [6]:
dataset.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [7]:
random.seed(42)
train_set_index = np.array(random.sample(range(len(dataset)),int(len(dataset)*0.75)))
test_set_index = np.setdiff1d(range(len(dataset)),train_set_index)

In [8]:
train_data = dataset.iloc[train_set_index]
test_data = dataset.iloc[test_set_index]

In [9]:
X_train = train_data.iloc[:,:-1]
y_train = train_data.iloc[:,-1]
X_test = test_data.iloc[:,:-1]
y_test = test_data.iloc[:,-1]

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform( X_train )
X_test = scaler.transform( X_test )

In [11]:
class LogisticRegressor:
  
  def __init__(self, X, y, no_of_iterations = 1000,lr = .001):
    self.X = X
    self.y = y
    self.n, self.m = X.shape # num of training data, num of features
    self.lr = lr # learning rate
    self.itr = no_of_iterations # num of iterations
    # weight and bias
    self.b = np.random.rand(1) * 0.1
    self.w = np.ones((self.m, 1))
    self.w *= np.random.rand(1) * 0.1

  # sigmoid function
  def sigmoid(self, z):
    sig = 1/ (1+np.exp(-z))
    return sig

  # cost function
  def cost_function(self, pred_y): 
    cost = np.mean((self.y * np.log(pred_y)) + ((1 - self.y) * np.log(1 - pred_y))) 
    return(cost)
  
  # traing the model
  def fit(self): 
    self.history = np.zeros(self.itr)
    for i in range(self.itr):
      pred_y = self.sigmoid(np.dot(self.X, self.w) + self.b)
      error = np.squeeze(pred_y) - self.y
      gw = np.dot(error, self.X) / self.n
      gb = np.sum(error) / self.n
      self.w = np.squeeze(self.w) - self.lr * gw
      self.b = np.squeeze(self.b) - self.lr *gb
      self.history[i] = self.cost_function(np.squeeze(pred_y))
  
  # predicting values for new data
  def predict(self, X): 
    pred_y = self.sigmoid(np.dot(self.X, self.w) + self.b)
    pred_y = pred_y // 0.5
    return pred_y

  # validating your models prediction using accuracy
  def accuracy(self, y, pred_y):
    y = np.array(y)
    val = 0
    for i in range(len(y)):
      if (y[i] == pred_y[i]):
        val += 1
    print(f'accuracy: {val/len(y)}')



In [12]:
l = LogisticRegressor(X_train, y_train)

In [13]:
l.fit()

In [14]:
ans = l.predict(X_test)

In [15]:
l.accuracy(y_test, ans)

accuracy: 0.54
