In [12]:
import numpy as np
import pandas as pd
import sys

In [14]:
# 1. Softmax Regression Model Implementation
class SoftmaxRegression:
    def __init__(self, eta=0.1, n_epochs=500, patience=10, random_state=42):
        self.eta = eta
        self.n_epochs = n_epochs
        self.patience = patience
        self.random_state = random_state
        self.theta_ = None # Model weights

    def _add_bias(self, X):
        return np.c_[np.ones((X.shape[0], 1)), X]

    def _softmax(self, logits):
        exps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def _compute_loss(self, y_one_hot, y_proba):
        epsilon = 1e-7
        return -np.mean(np.sum(y_one_hot * np.log(y_proba + epsilon), axis=1))

    def fit(self, X, y, X_val, y_val, batch_size):
        X_train_bias = self._add_bias(X)
        X_val_bias = self._add_bias(X_val)
        n_samples, n_features_with_bias = X_train_bias.shape
        n_outputs = len(np.unique(y))
        y_train_one_hot = np.eye(n_outputs)[y]
        y_val_one_hot = np.eye(n_outputs)[y_val]
        
        np.random.seed(self.random_state)
        self.theta_ = np.random.randn(n_features_with_bias, n_outputs)
        best_loss = np.inf
        patience_counter = 0
        best_theta = None

        for epoch in range(self.n_epochs):
            shuffled_indices = np.random.permutation(n_samples)
            X_shuffled = X_train_bias[shuffled_indices]
            y_shuffled = y_train_one_hot[shuffled_indices]

            # Iterate over batches
            for i in range(0, n_samples, batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]
                
                logits = X_batch @ self.theta_
                y_proba = self._softmax(logits)
                gradients = (1/batch_size) * X_batch.T @ (y_proba - y_batch)
                self.theta_ -= self.eta * gradients
            val_logits = X_val_bias @ self.theta_
            val_proba = self._softmax(val_logits)
            val_loss = self._compute_loss(y_val_one_hot, val_proba)
        
            if val_loss < best_loss:
                best_loss = val_loss
                best_theta = self.theta_.copy()
                patience_counter = 0
            else:
                patience_counter += 1
    
            if patience_counter >= self.patience:
                print(f"    -> Early stopping at epoch {epoch+1}")
                break
        self.theta_ = best_theta

    def predict(self, X):
        X_bias = self._add_bias(X)
        logits = X_bias @ self.theta_
        y_proba = self._softmax(logits)
        return np.argmax(y_proba, axis=1)

In [15]:
# 2. Data Loading and Preparation
# Load Iris dataset from a URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
df = pd.read_csv(url, header=None, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])

# Prepare data
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values
class_mapping = {name: i for i, name in enumerate(df['class'].unique())}
y = df['class'].map(class_mapping).values
np.random.seed(42)
shuffled_indices = np.random.permutation(len(X))
X = X[shuffled_indices]
y = y[shuffled_indices]
split_idx = int(0.8 * len(X))
X_train, X_val = X[:split_idx], X[split_idx:]
y_train, y_val = y[:split_idx], y[split_idx:]

In [16]:
# 3. Training and Evaluation
def calculate_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

print("Starting training...\n")
print("Training with Batch Gradient Descent...")
model_batch = SoftmaxRegression(eta=0.1, n_epochs=1000, patience=10)
model_batch.fit(X_train, y_train, X_val, y_val, batch_size=len(X_train))
y_pred_batch = model_batch.predict(X_val)
accuracy_batch = calculate_accuracy(y_val, y_pred_batch)
print(f"    -> Validation Accuracy: {accuracy_batch:.4f}\n")
print("Training with Stochastic Gradient Descent...")
model_sgd = SoftmaxRegression(eta=0.01, n_epochs=500, patience=20)
# Use a single sample per batch
model_sgd.fit(X_train, y_train, X_val, y_val, batch_size=1)
y_pred_sgd = model_sgd.predict(X_val)
accuracy_sgd = calculate_accuracy(y_val, y_pred_sgd)
print(f"    -> Validation Accuracy: {accuracy_sgd:.4f}\n")
print("Training with Minibatch Gradient Descent...")
model_mini = SoftmaxRegression(eta=0.1, n_epochs=500, patience=10)
# Use a batch size of 32
model_mini.fit(X_train, y_train, X_val, y_val, batch_size=32)
y_pred_mini = model_mini.predict(X_val)
accuracy_mini = calculate_accuracy(y_val, y_pred_mini)
print(f"    -> Validation Accuracy: {accuracy_mini:.4f}\n")


Starting training...

Training with Batch Gradient Descent...
    -> Early stopping at epoch 26
    -> Validation Accuracy: 0.6000

Training with Stochastic Gradient Descent...
    -> Early stopping at epoch 343
    -> Validation Accuracy: 1.0000

Training with Minibatch Gradient Descent...
    -> Early stopping at epoch 170
    -> Validation Accuracy: 1.0000

