In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [8]:
class LogisticRegression:
  def __init__(self, learning_rate, epochs):
    self.learning_rate = learning_rate
    self.epochs = epochs

  def fit(self, X, y):
    self.X = X
    self.y = y
    self.m, self.n = X.shape
    self.weights = np.zeros(self.n)
    self.bias = 0
    for epoch in range(self.epochs):
      self.update_weights()

  def update_weights(self):
    y_hat = self.sigmoid(self.X)
    dw = (1 / self.m) * np.dot(self.X.T, (y_hat - self.y))
    db = (1 / self.m) * np.sum(y_hat - self.y)
    self.weights -= self.learning_rate * dw
    self.bias -= self.learning_rate * db

  def sigmoid(self, X):
    return 1 / (1 + np.exp(-(self.weights.dot(X.T) + self.bias)))

  def predict(self, X):
    y_hat = self.sigmoid(X)
    return np.where(y_hat >= 0.5, 1, 0)


In [4]:
diabetes_dataset = pd.read_csv('/content/diabetes.csv')
X = diabetes_dataset.drop(columns = 'Outcome', axis=1)
y = diabetes_dataset['Outcome']

In [5]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [10]:
model = LogisticRegression(learning_rate=0.01, epochs=1000)
model.fit(X_train, y_train)
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print('Train Accuracy:', train_accuracy)
y_test_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print('Test Accuracy:', test_accuracy)

Train Accuracy: 0.7768729641693811
Test Accuracy: 0.7662337662337663
