In [51]:
# Implementing and Interpreting Logistic Regression from Scratch
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, log_loss
from sklearn.linear_model import LogisticRegression

In [52]:
X, y = make_classification(
    n_samples=1000,
    n_features=5,
    n_informative=5,
    n_redundant=0,
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [53]:
class LogisticRegressionScratch:
    def __init__(self, lr=0.01, n_iters=10000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.losses = []

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def loss(self, y, y_hat):
        return -np.mean(y*np.log(y_hat + 1e-9) + (1-y)*np.log(1-y_hat + 1e-9))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_output = np.dot(X, self.weights) + self.bias
            y_hat = self.sigmoid(linear_output)

            dw = (1/n_samples) * np.dot(X.T, (y_hat - y))
            db = (1/n_samples) * np.sum(y_hat - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            self.losses.append(self.loss(y, y_hat))

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_hat = self.sigmoid(linear_output)
        return (y_hat >= 0.5).astype(int)

    def predict_proba(self, X):
      linear_output = np.dot(X, self.weights) + self.bias
      return self.sigmoid(linear_output)


In [54]:
custom_model = LogisticRegressionScratch()
custom_model.fit(X_train, y_train)

y_prob_custom = custom_model.predict_proba(X_test)
custom_loss = log_loss(y_test, y_prob_custom)
y_pred_custom = custom_model.predict(X_test)

In [55]:
custom_accuracy = accuracy_score(y_test, y_pred_custom)
custom_precision = precision_score(y_test, y_pred_custom)
custom_recall = recall_score(y_test, y_pred_custom)
custom_loss = log_loss(y_test, y_pred_custom)

custom_accuracy, custom_precision, custom_recall, custom_loss

(0.825, 0.8118811881188119, 0.8367346938775511, 6.307639343095502)

In [56]:
sk_model = LogisticRegression()
sk_model.fit(X_train, y_train)

y_pred_sk = sk_model.predict(X_test)

In [57]:
coefficients = custom_model.weights

sorted_indices = np.argsort(coefficients)
top_negative = sorted_indices[:2]
top_positive = sorted_indices[-2:]

coefficients, top_positive, top_negative

(array([-0.76297217,  0.82739567,  0.72748989, -0.33026034, -0.74970028]),
 array([2, 1]),
 array([0, 4]))