# Logistic Regression From Scratch

## Mathematical Foundation

Logistic regression extends linear regression to binary classification by applying the sigmoid activation function:

$$p = \sigma(\mathbf{X}\mathbf{w} + b) = \frac{1}{1 + e^{-(\mathbf{X}\mathbf{w} + b)}}$$

where $\sigma$ is the sigmoid function, mapping logits to probabilities in [0, 1].

### Loss Function: Binary Cross-Entropy

$$L = -\frac{1}{n}\sum_{i=1}^{n}[y_i \log(p_i) + (1-y_i)\log(1-p_i)]$$

With L2 regularization:

$$L = -\frac{1}{n}\sum_{i=1}^{n}[y_i \log(p_i) + (1-y_i)\log(1-p_i)] + \frac{\lambda}{2}||\mathbf{w}||^2$$

### Gradients

$$\frac{\partial L}{\partial \mathbf{w}} = \frac{1}{n}\mathbf{X}^T(\mathbf{p} - \mathbf{y}) + \lambda\mathbf{w}$$

$$\frac{\partial L}{\partial b} = \frac{1}{n}\sum_{i=1}^{n}(p_i - y_i)$$

### Decision Rule

$$\hat{y} = \begin{cases} 1 & \text{if } p \geq 0.5 \\ 0 & \text{otherwise} \end{cases}$$


In [None]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

import numpy as np
import matplotlib.pyplot as plt
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import fetch_california_housing
from logistic_regression import LogisticRegression

np.random.seed(42)
plt.style.use('seaborn-v0_8-darkgrid')


## Converting Regression to Classification

For demonstration purposes, we'll convert the California Housing regression problem into a binary classification task by thresholding the median house value.


In [None]:
# Load California Housing dataset
housing = fetch_california_housing(as_frame=True)
X = housing.data.values
y = housing.target.values

# Convert to binary classification: 1 if median house value > median, else 0
threshold = np.median(y)
y_binary = (y > threshold).astype(int)

print(f"Threshold: {threshold:.2f}")
print(f"Class distribution:")
print(f"  Class 0 (≤ {threshold:.2f}): {np.sum(y_binary == 0)} samples")
print(f"  Class 1 (>{threshold:.2f}): {np.sum(y_binary == 1)} samples")


In [None]:
# Split data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_binary, test_size=0.2, random_state=42, stratify=y_binary
)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {X_train_scaled.shape}")
print(f"Test set: {X_test_scaled.shape}")


## Training Logistic Regression Model


In [None]:
# Train model without regularization
model_no_reg = LogisticRegression(
    learning_rate=0.1,
    max_iterations=2000,
    regularization=0.0,
    verbose=True
)

model_no_reg.fit(X_train_scaled, y_train)

print(f"\nFinal training loss: {model_no_reg.loss_history[-1]:.6f}")
print(f"Number of iterations: {len(model_no_reg.loss_history)}")


In [None]:
# Train model with L2 regularization
model_with_reg = LogisticRegression(
    learning_rate=0.1,
    max_iterations=2000,
    regularization=0.1,
    verbose=True
)

model_with_reg.fit(X_train_scaled, y_train)

print(f"\nFinal training loss: {model_with_reg.loss_history[-1]:.6f}")
print(f"Number of iterations: {len(model_with_reg.loss_history)}")


In [None]:
# Plot loss convergence
plt.figure(figsize=(12, 6))
plt.plot(model_no_reg.loss_history, label='No Regularization', linewidth=2)
plt.plot(model_with_reg.loss_history, label='L2 Regularization (λ=0.1)', linewidth=2)
plt.xlabel('Iteration')
plt.ylabel('Loss (Binary Cross-Entropy)')
plt.title('Training Loss Convergence')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## Model Evaluation


In [None]:
# Make predictions
y_pred_no_reg = model_no_reg.predict(X_test_scaled)
y_pred_with_reg = model_with_reg.predict(X_test_scaled)

# Calculate accuracy
acc_no_reg = model_no_reg.score(X_test_scaled, y_test)
acc_with_reg = model_with_reg.score(X_test_scaled, y_test)

print("Model Performance:")
print("=" * 50)
print(f"No Regularization:")
print(f"  Accuracy: {acc_no_reg:.4f}")
print(f"\nWith L2 Regularization (λ=0.1):")
print(f"  Accuracy: {acc_with_reg:.4f}")


In [None]:
# Confusion matrices
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

cm_no_reg = confusion_matrix(y_test, y_pred_no_reg)
cm_with_reg = confusion_matrix(y_test, y_pred_with_reg)

import seaborn as sns

sns.heatmap(cm_no_reg, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Confusion Matrix (No Regularization)')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

sns.heatmap(cm_with_reg, annot=True, fmt='d', cmap='Blues', ax=axes[1])
axes[1].set_title('Confusion Matrix (L2 Regularization)')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.show()


In [None]:
# Classification reports
print("Classification Report - No Regularization:")
print(classification_report(y_test, y_pred_no_reg))

print("\nClassification Report - With L2 Regularization:")
print(classification_report(y_test, y_pred_with_reg))


## Visualizing Decision Boundary (2D Projection)

For visualization, we'll project the data onto the two most important features and visualize the decision boundary.


In [None]:
# Use two most correlated features for visualization
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing(as_frame=True)

# Select two features (e.g., MedInc and AveRooms)
feature_idx = [0, 1]  # MedInc and HouseAge
X_2d = X_train_scaled[:, feature_idx]

# Train a 2D model for visualization
model_2d = LogisticRegression(learning_rate=0.1, max_iterations=1000, verbose=False)
model_2d.fit(X_2d, y_train)

# Create a mesh for decision boundary
x_min, x_max = X_2d[:, 0].min() - 1, X_2d[:, 0].max() + 1
y_min, y_max = X_2d[:, 1].min() - 1, X_2d[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# Predict on mesh
Z = model_2d.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.4, cmap='RdYlBu')
plt.scatter(X_2d[y_train == 0, 0], X_2d[y_train == 0, 1], 
           c='blue', marker='o', label='Class 0', alpha=0.6, s=20)
plt.scatter(X_2d[y_train == 1, 0], X_2d[y_train == 1, 1], 
           c='red', marker='s', label='Class 1', alpha=0.6, s=20)
plt.xlabel('MedInc (scaled)')
plt.ylabel('HouseAge (scaled)')
plt.title('Logistic Regression Decision Boundary (2D Projection)')
plt.legend()
plt.tight_layout()
plt.show()
