In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression

# Generate synthetic binary classification dataset
X, y = make_classification(
    n_samples=500,
    n_features=5,
    n_informative=3,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [4]:
class LogisticRegressionScratch:

    def __init__(self, lr=0.01, max_iter=1000, tol=1e-6):
        self.lr = lr
        self.max_iter = max_iter
        self.tol = tol

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def compute_loss(self, y, y_hat):
        # Binary Cross Entropy Loss
        n = len(y)
        return -(1/n) * np.sum(y*np.log(y_hat + 1e-10) + (1 - y)*np.log(1 - y_hat + 1e-10))

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Initialize weights & bias
        self.W = np.zeros(n_features)
        self.b = 0

        prev_loss = float('inf')

        for _ in range(self.max_iter):

            # Linear model
            linear_output = np.dot(X, self.W) + self.b

            # Sigmoid activation
            y_hat = self.sigmoid(linear_output)

            # Compute gradients
            dW = (1/n_samples) * np.dot(X.T, (y_hat - y))
            db = (1/n_samples) * np.sum(y_hat - y)

            # Update weights
            self.W -= self.lr * dW
            self.b -= self.lr * db

            # Compute new loss
            loss = self.compute_loss(y, y_hat)

            # Stop if change is very small
            if abs(prev_loss - loss) < self.tol:
                break

            prev_loss = loss

    def predict(self, X):
        linear_output = np.dot(X, self.W) + self.b
        y_hat = self.sigmoid(linear_output)
        return np.where(y_hat >= 0.5, 1, 0)


In [5]:
model_scratch = LogisticRegressionScratch(lr=0.05, max_iter=5000, tol=1e-6)
model_scratch.fit(X_train, y_train)

y_pred_scratch = model_scratch.predict(X_test)


In [6]:
print("Custom Logistic Regression Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_scratch))
print("Precision:", precision_score(y_test, y_pred_scratch))
print("Recall:", recall_score(y_test, y_pred_scratch))


Custom Logistic Regression Results:
Accuracy: 0.9
Precision: 0.9347826086956522
Recall: 0.86


In [7]:
model_sklearn = LogisticRegression()
model_sklearn.fit(X_train, y_train)
y_pred_sklearn = model_sklearn.predict(X_test)

print("\nsklearn Logistic Regression Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_sklearn))
print("Precision:", precision_score(y_test, y_pred_sklearn))
print("Recall:", recall_score(y_test, y_pred_sklearn))



sklearn Logistic Regression Results:
Accuracy: 0.9
Precision: 0.9347826086956522
Recall: 0.86


In [8]:
results = pd.DataFrame({
    "Model": ["Custom Logistic Regression", "sklearn LogisticRegression"],
    "Accuracy": [
        accuracy_score(y_test, y_pred_scratch),
        accuracy_score(y_test, y_pred_sklearn)
    ],
    "Precision": [
        precision_score(y_test, y_pred_scratch),
        precision_score(y_test, y_pred_sklearn)
    ],
    "Recall": [
        recall_score(y_test, y_pred_scratch),
        recall_score(y_test, y_pred_sklearn)
    ]
})

results


Unnamed: 0,Model,Accuracy,Precision,Recall
0,Custom Logistic Regression,0.9,0.934783,0.86
1,sklearn LogisticRegression,0.9,0.934783,0.86
