Importing Libraries

In [None]:
import numpy as np
import time
import platform
import os
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score

Loss Functions

In [None]:
class LogisticLoss:
    """Implements the Logistic Loss function: phi(p,y) = ln(1 + exp(-py))"""
    def gradient(self, p, y):
        """phi'(p,y) = -y / (1 + exp(py))"""
        return -y / (1 + np.exp(p * y))

class HingeLoss:
    """Implements the SVM Hinge Loss: phi(p,y) = max(0, 1 - py)"""
    def gradient(self, p, y):
        """phi'(p,y) = -y if py < 1, 0 otherwise"""
        return -y if (p * y) < 1 else 0

Implement the Standard SGD Algorithm

In [None]:
class StandardSGD:
    """Implements Algorithm 2.1 (Standard SGD)"""
    def __init__(self, loss_fn, eta=0.002, lambda_reg=0.0, n_epochs=10):
        self.loss_fn = loss_fn
        self.eta = eta
        self.lambda_reg = lambda_reg
        self.n_epochs = n_epochs

    def _add_intercept(self, X):
        return np.hstack([np.ones((X.shape[0], 1)), X])

    def fit(self, X, y):
        start_time = time.perf_counter()

        X_b = self._add_intercept(X)
        n_samples, n_features = X_b.shape
        self.w_ = np.zeros(n_features)

        for _ in range(self.n_epochs):
            indices = np.random.permutation(n_samples) # Random ordering
            for i in indices:
                X_i, y_i = X_b[i], y[i]
                p = self.w_ @ X_i
                phi_prime = self.loss_fn.gradient(p, y_i)
                grad_vector = (self.lambda_reg * self.w_) + (phi_prime * X_i)
                self.w_ = self.w_ - self.eta * grad_vector

        end_time = time.perf_counter()
        self.comp_time_ms_ = (end_time - start_time) * 1000
        return self.comp_time_ms_

    def predict(self, X):
        X_b = self._add_intercept(X)
        p = X_b @ self.w_
        return np.sign(p)

    def get_weights_norm(self):
        return np.linalg.norm(self.w_)

Implement the Average SGD Algorithm

In [None]:
class AveragedSGD:
    """Implements Algorithm 5.1 (Averaged SGD)"""
    def __init__(self, loss_fn, eta=0.002, lambda_reg=0.0, n_epochs=10):
        self.loss_fn = loss_fn
        self.eta = eta
        self.lambda_reg = lambda_reg
        self.n_epochs = n_epochs

    def _add_intercept(self, X):
        return np.hstack([np.ones((X.shape[0], 1)), X])

    def fit(self, X, y):
        start_time = time.perf_counter()

        X_b = self._add_intercept(X)
        n_samples, n_features = X_b.shape

        w_t = np.zeros(n_features)
        self.v_ = np.zeros(n_features) # This is the final averaged vector
        r_t = 0.0

        for _ in range(self.n_epochs):
            indices = np.random.permutation(n_samples) # Random ordering
            for i in indices:
                X_i, y_i = X_b[i], y[i]
                p = w_t @ X_i
                phi_prime = self.loss_fn.gradient(p, y_i)

                # Update the average
                r_t_prev = r_t
                r_t = r_t_prev + self.eta
                if r_t > 0:
                    self.v_ = (r_t_prev / r_t) * self.v_ + (self.eta / r_t) * w_t

                # Update w_t for the *next* iteration
                grad_vector = (self.lambda_reg * w_t) + (phi_prime * X_i)
                w_t = w_t - self.eta * grad_vector

        end_time = time.perf_counter()
        self.comp_time_ms_ = (end_time - start_time) * 1000
        return self.comp_time_ms_

    def predict(self, X):
        X_b = self._add_intercept(X)
        p = X_b @ self.v_
        return np.sign(p)

    def get_weights_norm(self):
        return np.linalg.norm(self.v_)

Run Experiments helper function

In [None]:
def run_experiment(model, title, X_train, y_train, X_test, y_test):
    print(f"--- {title} ---")

    # Fit model and get time
    comp_time_ms = model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    # Use zero_division=0 to avoid warnings if no positive predictions
    precision = precision_score(y_test, y_pred, zero_division=0)
    weight_norm = model.get_weights_norm()

    # Print in the requested format
    print(f"  > Test Accuracy: {accuracy * 100:.2f}%")
    print(f"  > Test Precision: {precision * 100:.2f}%")
    print(f"  > Time Taken: {comp_time_ms:.2f} ms")
    print(f"  > Final |w| (L2-Norm): {weight_norm:.4f}")
    print("\n")



Load Data

In [None]:
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
                           n_redundant=5, random_state=42)
y = (y * 2) - 1  # Map {0, 1} -> {-1, 1}
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                    random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Run Comparisons

In [None]:
# EXPERIMENT 1: Baseline

print("="*50)
print("EXPERIMENT 1: Baseline (Paper's Recommendation)")
print("Small eta, No Regularization, Early Stopping (10 epochs)")
print("="*50)
params_baseline = {"eta": 0.002, "lambda_reg": 0.0, "n_epochs": 10}
run_experiment(StandardSGD(HingeLoss(), **params_baseline),
               "Standard SGD (SVM Loss)",
               X_train, y_train, X_test, y_test)
run_experiment(AveragedSGD(HingeLoss(), **params_baseline),
               "Averaged SGD (SVM Loss)",
               X_train, y_train, X_test, y_test)

EXPERIMENT 1: Baseline (Paper's Recommendation)
Small eta, No Regularization, Early Stopping (10 epochs)
--- Standard SGD (SVM Loss) ---
  > Test Accuracy: 83.00%
  > Test Precision: 79.08%
  > Time Taken: 90.55 ms
  > Final |w| (L2-Norm): 1.7009


--- Averaged SGD (SVM Loss) ---
  > Test Accuracy: 82.33%
  > Test Precision: 78.81%
  > Time Taken: 120.61 ms
  > Final |w| (L2-Norm): 1.2968




In [None]:
# EXPERIMENT 2: Effect of Large Learning Rate (η)

print("="*50)
print("EXPERIMENT 2: Effect of Large Learning Rate (eta)")
print("Large eta (0.5), No Regularization, 10 epochs")
print("="*50)
params_large_eta = {"eta": 0.5, "lambda_reg": 0.0, "n_epochs": 10}
run_experiment(StandardSGD(HingeLoss(), **params_large_eta),
               "Standard SGD (Large eta)",
               X_train, y_train, X_test, y_test)
run_experiment(AveragedSGD(HingeLoss(), **params_large_eta),
               "Averaged SGD (Large eta)",
               X_train, y_train, X_test, y_test)

EXPERIMENT 2: Effect of Large Learning Rate (eta)
Large eta (0.5), No Regularization, 10 epochs
--- Standard SGD (Large eta) ---
  > Test Accuracy: 79.67%
  > Test Precision: 75.82%
  > Time Taken: 89.53 ms
  > Final |w| (L2-Norm): 11.7736


--- Averaged SGD (Large eta) ---
  > Test Accuracy: 83.33%
  > Test Precision: 78.85%
  > Time Taken: 129.59 ms
  > Final |w| (L2-Norm): 10.5280




In [None]:
# EXPERIMENT 3: Regularization (λ) vs. Early Stopping

print("="*50)
print("EXPERIMENT 3: Regularization (lambda) vs. Early Stopping")
print("="*50)
print("(Run A: Early Stopping - Same as baseline)")
params_early_stop = {"eta": 0.002, "lambda_reg": 0.0, "n_epochs": 10}
run_experiment(StandardSGD(HingeLoss(), **params_early_stop),
               "Standard SGD (Early Stop)",
               X_train, y_train, X_test, y_test)

print("(Run B: Explicit Regularization - More epochs)")
params_explicit_reg = {"eta": 0.002, "lambda_reg": 0.01, "n_epochs": 100}
run_experiment(StandardSGD(HingeLoss(), **params_explicit_reg),
               "Standard SGD (Explicit Lambda)",
               X_train, y_train, X_test, y_test)

EXPERIMENT 3: Regularization (lambda) vs. Early Stopping
(Run A: Early Stopping - Same as baseline)
--- Standard SGD (Early Stop) ---
  > Test Accuracy: 82.00%
  > Test Precision: 77.92%
  > Time Taken: 81.78 ms
  > Final |w| (L2-Norm): 1.6807


(Run B: Explicit Regularization - More epochs)
--- Standard SGD (Explicit Lambda) ---
  > Test Accuracy: 84.00%
  > Test Precision: 79.87%
  > Time Taken: 831.47 ms
  > Final |w| (L2-Norm): 1.8281


