In [None]:
"""
Logistic Regression - No Framework Implementation
Built from scratch using only NumPy.
"""

import numpy as np
import json
import sys
sys.path.append('../..')

# Self created utilities
from utils.metrics import accuracy, precision, recall, f1_score, auc_score
from utils.performance import track_performance
from utils.visualization import (
    plot_cost_curve,
    plot_confusion_matrix,
    plot_roc_curve,
    plot_feature_importance
)

# Load preprocessed data (already scaled, SMOTE applied, 50/50 balanced)
X_train = np.load('../../data/processed/logistic_regression/X_train.npy')
X_test = np.load('../../data/processed/logistic_regression/X_test.npy')
y_train = np.load('../../data/processed/logistic_regression/y_train.npy')
y_test = np.load('../../data/processed/logistic_regression/y_test.npy')

# Load metadata for feature names
with open('../../data/processed/logistic_regression/preprocessing_info.json') as f:
    meta = json.load(f)
feature_names = meta['feature_names']

print(f"Training: {X_train.shape[0]:,} samples, {X_train.shape[1]} features")
print(f"Test: {X_test.shape[0]:,} samples")
print(f"Class balance - Train: {np.mean(y_train):.1%} fraud")
print(f"Class balance - Test: {np.mean(y_test):.1%} fraud")

Training: 454,902 samples, 30 features
Test: 56,962 samples
Class balance - Train: 50.0% fraud
Class balance - Test: 0.2% fraud


In [2]:
# Core functions for logistic regression

def sigmoid(z):
    """
    Sigmoid activation function.
    Maps any real number to (0, 1) range - perfect for probability output.
    """
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

def binary_cross_entropy(y_true, y_pred):
    """
    Binary Cross-Entropy Loss (Log Loss).
    Measures how well predicted probabilities match actual labels.

    Formula: -[y*log(p) + (1-y)*log(1-p)]
    - When y=1: loss = -log(p)     → penalizes low probability for fraud
    - When y=0: loss = -log(1-p)   → penalizes high probability for legit
    """
    # Clip predictions to avoid log(0)
    eps = 1e-15
    y_pred = np.clip(y_pred, eps, 1 - eps)

    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [3]:
# Training: Gradient Descent

# Hyperparameters
learning_rate = 0.1
n_iterations = 1000

# Initalize weights and bias to zero
n_features = X_train.shape[1]
weights = np.zeros(n_features)
bias = 0.0

# Track cost history for visualization
cost_history = []

# Training loop with performance tracking
with track_performance() as perf:
    for i in range(n_iterations):
        # Forward pass: compute predictions
        z = np.dot(X_train, weights) + bias     # Linear combination
        y_pred = sigmoid(z)                     # Apply sigmoid for probabilities

        # Compute cost (for monitoring, not used in gradients)
        cost = binary_cross_entropy(y_train, y_pred)
        cost_history.append(cost)

        # Compute gradients (derivative of cost w.r.t. weights and bias)
        # These come from calculus - the chain rule applied to BCE + sigmoid
        error = y_pred - y_train                        # (predictions - actual)
        dw = np.dot(X_train.T, error) / len(y_train)    # Gradient for weights
        db = np.mean(error)                             # Gradient for bias

        # Update parameters (move opposite to gradient direction)
        weights -= learning_rate * dw
        bias -= learning_rate * db

        # Print progress every 100 iterations
        if (i + 1) % 100 == 0:
            print(f"Iteration {i+1:4d} | Cost: {cost:.6f}")

print(f"\nTraining complete!")
print(f"Time: {perf['time']:.2f} sec | Memory: {perf['memory']:.2f} MB")

Iteration  100 | Cost: 0.144255
Iteration  200 | Cost: 0.100753
Iteration  300 | Cost: 0.083974
Iteration  400 | Cost: 0.075091
Iteration  500 | Cost: 0.069554
Iteration  600 | Cost: 0.065747
Iteration  700 | Cost: 0.062953
Iteration  800 | Cost: 0.060806
Iteration  900 | Cost: 0.059100
Iteration 1000 | Cost: 0.057708

Training complete!
Time: 18.34 sec | Memory: 27.87 MB
