In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import random

# ----------------------------------
# 0. Reproducibility and device
# ----------------------------------

def set_seed(seed: int = 42) -> None:
    """Set random seeds for reproducible experiments."""
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def get_device() -> torch.device:
    """Selects MPS (Apple), CUDA, or CPU."""
    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
        return torch.device("mps")
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

set_seed(42)
DEVICE = get_device()
print("Using device for Set A:", DEVICE)

EPOCHS_A = 50
LR_A = 1e-3
BATCH_SIZE_A = 256

# ----------------------------------
# 1. Complexity measure helpers
# ----------------------------------

def calculate_l2_norm(model: nn.Module) -> float:
    """Computes the Frobenius norm of all weight matrices."""
    l2_norm = 0.0
    for name, param in model.named_parameters():
        if "weight" in name:
            l2_norm += torch.sum(param.detach() ** 2)
    return torch.sqrt(l2_norm).item()

def calculate_spectral_norm(model: nn.Module) -> float:
    """Computes the sum of maximum singular values across weight matrices."""
    spectral_norm_sum = 0.0
    for name, param in model.named_parameters():
        if "weight" in name and param.dim() > 1:
            W = param
            try:
                if W.numel() > 0:
                    _, S, _ = torch.linalg.svd(W, full_matrices=False)
                    spectral_norm_sum += S[0].item()
            except Exception:
                continue
    return spectral_norm_sum

def calculate_sharpness(
    model: nn.Module,
    criterion: nn.Module,
    data_loader: DataLoader,
    rho: float = 0.01,
    device: torch.device = DEVICE,
) -> float:
    """
    Approximates sharpness using a single SAM-style perturbation step.
    S(w*) = (L(w* + Îµ) - L(w*)) / (1 + L(w*)).
    """
    model.eval()

    try:
        data_batch, target_batch = next(iter(data_loader))
    except StopIteration:
        return 0.0

    data_batch, target_batch = data_batch.to(device), target_batch.to(device)

    with torch.no_grad():
        out = model(data_batch)
        base_loss = criterion(out, target_batch).item()

    model.zero_grad()
    out = model(data_batch)
    loss = criterion(out, target_batch)
    loss.backward()

    grad_norm_sq = 0.0
    for p in model.parameters():
        if p.grad is not None:
            grad_norm_sq += torch.sum(p.grad ** 2)
    grad_norm = torch.sqrt(grad_norm_sq)
    if grad_norm.item() == 0.0:
        return 0.0

    epsilon_map = {}
    for name, p in model.named_parameters():
        if p.grad is not None:
            eps = (p.grad / grad_norm) * rho
            p.data.add_(eps)
            epsilon_map[name] = eps

    with torch.no_grad():
        out_pert = model(data_batch)
        pert_loss = criterion(out_pert, target_batch).item()

    for name, p in model.named_parameters():
        if name in epsilon_map:
            p.data.sub_(epsilon_map[name])

    sharp = (pert_loss - base_loss) / (1.0 + base_loss)
    return max(0.0, sharp)

def evaluate_model(
    model: nn.Module,
    data_loader: DataLoader,
    criterion: nn.Module,
    device: torch.device = DEVICE,
) -> tuple[float, float]:
    """Computes average loss and classification error on a dataset."""
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            total_loss += loss.item() * data.size(0)

            _, preds = torch.max(outputs.data, 1)
            correct += (preds == target).sum().item()
            total += target.size(0)

    avg_loss = total_loss / total
    error = 1.0 - (correct / total)
    return avg_loss, error

# ----------------------------------
# 2. Data loading and preprocessing
# ----------------------------------

def load_uci_adult(test_size: float = 0.2, seed: int = 42):
    """
    Loads the UCI Adult dataset from OpenML, applies one-hot encoding to
    categorical variables and standardisation to numeric features.
    """
    print("\nLoading UCI Adult dataset...")
    adult = fetch_openml("adult", version=2, as_frame=True)
    df = adult.frame.copy()

    target_col = "class"
    y = (df[target_col] == ">50K").astype(int).values
    X = df.drop(columns=[target_col])

    cat_cols = X.select_dtypes(include=["category", "object"]).columns
    num_cols = X.select_dtypes(include=["int64", "float64"]).columns

    X_cat = pd.get_dummies(X[cat_cols], drop_first=True)
    scaler = StandardScaler()
    X_num = pd.DataFrame(scaler.fit_transform(X[num_cols]), columns=num_cols)

    X_proc = pd.concat([X_num, X_cat], axis=1)
    X_array = X_proc.values.astype(np.float32)
    y_array = y.astype(np.int64)

    X_train, X_test, y_train, y_test = train_test_split(
        X_array,
        y_array,
        test_size=test_size,
        random_state=seed,
        stratify=y_array,
    )

    X_train_t = torch.tensor(X_train, dtype=torch.float32)
    y_train_t = torch.tensor(y_train, dtype=torch.long)
    X_test_t = torch.tensor(X_test, dtype=torch.float32)
    y_test_t = torch.tensor(y_test, dtype=torch.long)

    train_ds = TensorDataset(X_train_t, y_train_t)
    test_ds = TensorDataset(X_test_t, y_test_t)

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE_A, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE_A, shuffle=False)

    input_dim = X_train.shape[1]
    print(f"UCI Adult: train={X_train.shape[0]}, test={X_test.shape[0]}, input_dim={input_dim}")
    return train_loader, test_loader, input_dim

# ----------------------------------
# 3. Model definitions (Set A)
# ----------------------------------

class LogisticRegressionModel(nn.Module):
    """Single linear layer for binary classification."""
    def __init__(self, input_dim: int, num_classes: int = 2):
        super().__init__()
        self.linear = nn.Linear(input_dim, num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear(x)

class ShallowFFN(nn.Module):
    """One-hidden-layer feed-forward network."""
    def __init__(self, input_dim: int, hidden_dim: int = 64, num_classes: int = 2):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# ----------------------------------
# 4. Training and evaluation wrapper
# ----------------------------------

def train_and_evaluate_tabular(
    model: nn.Module,
    train_loader: DataLoader,
    test_loader: DataLoader,
    epochs: int = EPOCHS_A,
    lr: float = LR_A,
    device: torch.device = DEVICE,
) -> dict:
    """Trains a tabular model and returns generalisation and complexity metrics."""
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nTraining model with {total_params} trainable parameters.")

    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            out = model(data)
            loss = criterion(out, target)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * data.size(0)

        avg_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {avg_loss:.4f}")

    train_loss, train_error = evaluate_model(model, train_loader, criterion, device)
    test_loss, test_error = evaluate_model(model, test_loader, criterion, device)
    gen_gap = test_error - train_error

    l2 = calculate_l2_norm(model)
    spec = calculate_spectral_norm(model)
    sharp = calculate_sharpness(model, criterion, train_loader, device=device)

    return {
        "params": total_params,
        "train_error": train_error,
        "test_error": test_error,
        "gen_gap": gen_gap,
        "l2_norm": l2,
        "spectral_norm": spec,
        "sharpness": sharp,
    }

# ----------------------------------
# 5. Main entry point (Set A)
# ----------------------------------

def run_set_a_experiment():
    """Runs Set A experiments on UCI Adult with logistic regression and shallow FFN."""
    print("\n--- Set A: UCI Adult (Classical Baseline) ---")
    print(f"Epochs: {EPOCHS_A}, Device: {DEVICE}")

    train_loader, test_loader, input_dim = load_uci_adult(test_size=0.2, seed=42)
    results = []

    print("\nModel A1: Logistic Regression")
    logreg = LogisticRegressionModel(input_dim=input_dim, num_classes=2)
    m_logreg = train_and_evaluate_tabular(logreg, train_loader, test_loader)
    m_logreg["id"] = "A_LogReg"
    results.append(m_logreg)

    print("\nModel A2: Shallow FFN (64 hidden units)")
    shallow = ShallowFFN(input_dim=input_dim, hidden_dim=64, num_classes=2)
    m_ffn = train_and_evaluate_tabular(shallow, train_loader, test_loader)
    m_ffn["id"] = "A_ShallowFFN_64"
    results.append(m_ffn)

    df_a = pd.DataFrame(results)
    out_name = "dissertation_results_set_a.csv"
    df_a.to_csv(out_name, index=False)
    print(f"\nSet A results saved to '{out_name}'")
    print(df_a[["id", "params", "train_error", "test_error", "gen_gap",
                "l2_norm", "spectral_norm", "sharpness"]])

if __name__ == "__main__":
    run_set_a_experiment()


Using device for Set A: mps

--- Set A: UCI Adult (Classical Baseline) ---
Epochs: 50, Device: mps

Loading UCI Adult dataset...
UCI Adult: train=39073, test=9769, input_dim=97

Model A1: Logistic Regression

Training model with 196 trainable parameters.
Epoch [1/50] - Train Loss: 0.5206
Epoch [2/50] - Train Loss: 0.3988
Epoch [3/50] - Train Loss: 0.3662
Epoch [4/50] - Train Loss: 0.3515
Epoch [5/50] - Train Loss: 0.3433
Epoch [6/50] - Train Loss: 0.3380
Epoch [7/50] - Train Loss: 0.3342
Epoch [8/50] - Train Loss: 0.3314
Epoch [9/50] - Train Loss: 0.3292
Epoch [10/50] - Train Loss: 0.3275
Epoch [11/50] - Train Loss: 0.3259
Epoch [12/50] - Train Loss: 0.3248
Epoch [13/50] - Train Loss: 0.3238
Epoch [14/50] - Train Loss: 0.3231
Epoch [15/50] - Train Loss: 0.3224
Epoch [16/50] - Train Loss: 0.3219
Epoch [17/50] - Train Loss: 0.3214
Epoch [18/50] - Train Loss: 0.3211
Epoch [19/50] - Train Loss: 0.3207
Epoch [20/50] - Train Loss: 0.3204
Epoch [21/50] - Train Loss: 0.3202
Epoch [22/50] - Tra

  _, S, _ = torch.linalg.svd(W, full_matrices=False)



Model A2: Shallow FFN (64 hidden units)

Training model with 6402 trainable parameters.
Epoch [1/50] - Train Loss: 0.4061
Epoch [2/50] - Train Loss: 0.3180
Epoch [3/50] - Train Loss: 0.3118
Epoch [4/50] - Train Loss: 0.3090
Epoch [5/50] - Train Loss: 0.3072
Epoch [6/50] - Train Loss: 0.3056
Epoch [7/50] - Train Loss: 0.3044
Epoch [8/50] - Train Loss: 0.3037
Epoch [9/50] - Train Loss: 0.3026
Epoch [10/50] - Train Loss: 0.3017
Epoch [11/50] - Train Loss: 0.3009
Epoch [12/50] - Train Loss: 0.3002
Epoch [13/50] - Train Loss: 0.2992
Epoch [14/50] - Train Loss: 0.2988
Epoch [15/50] - Train Loss: 0.2984
Epoch [16/50] - Train Loss: 0.2973
Epoch [17/50] - Train Loss: 0.2966
Epoch [18/50] - Train Loss: 0.2958
Epoch [19/50] - Train Loss: 0.2953
Epoch [20/50] - Train Loss: 0.2948
Epoch [21/50] - Train Loss: 0.2942
Epoch [22/50] - Train Loss: 0.2937
Epoch [23/50] - Train Loss: 0.2931
Epoch [24/50] - Train Loss: 0.2925
Epoch [25/50] - Train Loss: 0.2921
Epoch [26/50] - Train Loss: 0.2917
Epoch [27/