#**Building Logistic Regression**

importing libraries

In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score


creating logistic regression class

In [3]:

class Logistic_Regression():

  def __init__(self, learning_rate, no_of_iterations):

    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations

  def fit(self, X, Y):

    self.X = X
    self.Y = Y

    self.data_size = X.shape[0]
    self.no_of_features = X.shape[1]

    self.weights = np.zeros(self.no_of_features)
    self.bias = 0

    for i in range(self.no_of_iterations):
      self.update_weights()

  def update_weights(self):
    z = self.X.dot(self.weights) + self.bias
    Y_prediction = 1 / (1 + np.exp(-z))
    partial_derivative_to_weights = 1 / self.data_size * self.X.T.dot( Y_prediction - self.Y )
    partial_derivative_to_bias = 1 / self.data_size * np.sum((Y_prediction - self.Y))
    self.weights = self.weights - self.learning_rate * partial_derivative_to_weights
    self.bias = self.bias - self.learning_rate * partial_derivative_to_bias

  def predict(self, X):
    z = X.dot(self.weights) + self.bias
    Y_prediction = 1 / (1 + np.exp(-z))
    Y_pred = np.where(Y_prediction > 0.5, 1, 0)
    return Y_pred


load the data

In [12]:
diabetes_df = pd.read_csv('diabetes.csv')
X = diabetes_df.drop(columns = 'Outcome')
Y = diabetes_df.Outcome


Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

split the data

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

standardising the data

In [18]:
standard_scaler = StandardScaler()
standard_scaler.fit(X_train)
X_train_standardised = standard_scaler.transform(X_train)
X_test_standardised = standard_scaler.transform(X_test)
X_train_standardised.std()

0.9583971477855755

training the model

In [20]:
logistic_regression = Logistic_Regression(0.01, 1000)
logistic_regression.fit(X_train_standardised, Y_train)
Y_predict = logistic_regression.predict(X_test_standardised)

evaluating the model

In [23]:
X_train_prediction = logistic_regression.predict(X_train_standardised)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print(training_data_accuracy)

X_test_prediction = logistic_regression.predict(X_test_standardised)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print(test_data_accuracy)


0.7719869706840391
0.7662337662337663
