In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import random
from tqdm import tqdm

# ----------------------------------
# 0. Reproducibility and device
# ----------------------------------

def set_seed(seed: int = 42) -> None:
    """Set random seeds for reproducible experiments."""
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def get_device() -> torch.device:
    """Selects MPS (Apple), CUDA, or CPU."""
    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
        return torch.device("mps")
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

set_seed(42)
DEVICE = get_device()
print("Using device for Set C:", DEVICE)

EPOCHS_C = 100
LR_C = 0.01
BATCH_SIZE_C = 128

# ----------------------------------
# 1. Complexity measure helpers
# ----------------------------------

def calculate_l2_norm(model: nn.Module) -> float:
    """Computes the Frobenius norm of all weight matrices."""
    l2_norm = 0.0
    for name, param in model.named_parameters():
        if "weight" in name:
            l2_norm += torch.sum(param.detach() ** 2)
    return torch.sqrt(l2_norm).item()

def calculate_spectral_norm(model: nn.Module) -> float:
    """Computes the sum of maximum singular values across weight matrices."""
    spectral_norm_sum = 0.0
    for name, param in model.named_parameters():
        if "weight" in name and param.dim() > 1:
            if param.dim() == 4:
                W = param.view(param.shape[0], -1)
            else:
                W = param
            try:
                if W.numel() > 0:
                    _, S, _ = torch.linalg.svd(W, full_matrices=False)
                    spectral_norm_sum += S[0].item()
            except Exception:
                continue
    return spectral_norm_sum

def calculate_sharpness(
    model: nn.Module,
    criterion: nn.Module,
    data_loader: DataLoader,
    rho: float = 0.01,
    device: torch.device = DEVICE,
) -> float:
    """
    Approximates sharpness using a single SAM-style perturbation step.
    S(w*) = (L(w* + ε) - L(w*)) / (1 + L(w*)).
    """
    model.eval()

    try:
        data_batch, target_batch = next(iter(data_loader))
    except StopIteration:
        return 0.0

    data_batch, target_batch = data_batch.to(device), target_batch.to(device)

    with torch.no_grad():
        outputs = model(data_batch)
        base_loss = criterion(outputs, target_batch).item()

    model.zero_grad()
    outputs = model(data_batch)
    loss = criterion(outputs, target_batch)
    loss.backward()

    grad_norm_sq = 0.0
    for p in model.parameters():
        if p.grad is not None:
            grad_norm_sq += torch.sum(p.grad ** 2)
    grad_norm = torch.sqrt(grad_norm_sq)
    if grad_norm.item() == 0.0:
        return 0.0

    epsilon_map = {}
    for name, p in model.named_parameters():
        if p.grad is not None:
            eps = (p.grad / grad_norm) * rho
            p.data.add_(eps)
            epsilon_map[name] = eps

    with torch.no_grad():
        outputs_perturbed = model(data_batch)
        pert_loss = criterion(outputs_perturbed, target_batch).item()

    for name, p in model.named_parameters():
        if name in epsilon_map:
            p.data.sub_(epsilon_map[name])

    sharp = (pert_loss - base_loss) / (1.0 + base_loss)
    return max(0.0, sharp)

def evaluate_model(
    model: nn.Module,
    data_loader: DataLoader,
    criterion: nn.Module,
    device: torch.device = DEVICE,
) -> tuple[float, float]:
    """Computes average loss and classification error on a dataset."""
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            total_loss += loss.item() * data.size(0)

            _, preds = torch.max(outputs.data, 1)
            correct += (preds == target).sum().item()
            total += target.size(0)

    avg_loss = total_loss / total
    error = 1.0 - (correct / total)
    return avg_loss, error

# ----------------------------------
# 2. Model definition (Variable CNN)
# ----------------------------------

class VariableCNN(nn.Module):
    """
    Convolutional network for Fashion-MNIST with variable width and depth.
    Architecture: Conv -> ReLU -> Pool -> Conv -> ReLU -> Pool -> FC -> FC.
    """
    def __init__(
        self,
        conv1_filters: int,
        conv2_filters: int,
        fc1_size: int,
        num_classes: int = 10,
    ):
        super().__init__()
        self.conv1 = nn.Conv2d(1, conv1_filters, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(conv1_filters, conv2_filters, kernel_size=3, padding=1)
        self.flattened_size = 7 * 7 * conv2_filters
        self.fc1 = nn.Linear(self.flattened_size, fc1_size)
        self.fc2 = nn.Linear(fc1_size, num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, self.flattened_size)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# ----------------------------------
# 3. Fashion-MNIST loader
# ----------------------------------

def load_fashion_mnist(
    batch_size: int = BATCH_SIZE_C,
):
    """
    Loads the Fashion-MNIST dataset with standard normalisation.
    Uses the full training and test sets.
    """
    transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,)),
        ]
    )

    train_dataset = datasets.FashionMNIST(
        "./data", train=True, download=True, transform=transform
    )
    test_dataset = datasets.FashionMNIST(
        "./data", train=False, download=True, transform=transform
    )

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

# ----------------------------------
# 4. Training for one configuration
# ----------------------------------

def train_and_evaluate_cnn(
    config: dict,
    train_loader: DataLoader,
    test_loader: DataLoader,
    epochs: int = EPOCHS_C,
    lr: float = LR_C,
    device: torch.device = DEVICE,
) -> dict:
    """
    Trains a VariableCNN for a given configuration and reports metrics.
    """
    model = VariableCNN(
        conv1_filters=config["conv1_filters"],
        conv2_filters=config["conv2_filters"],
        fc1_size=config["fc1_size"],
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\n[{config['id']}] parameters: {total_params}")

    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * data.size(0)

        scheduler.step()
        avg_loss = running_loss / len(train_loader.dataset)
        print(f"[{config['id']}] Epoch [{epoch+1}/{epochs}] - Train Loss: {avg_loss:.4f}")

    train_loss, train_error = evaluate_model(model, train_loader, criterion, device)
    test_loss, test_error = evaluate_model(model, test_loader, criterion, device)
    gen_gap = test_error - train_error

    l2 = calculate_l2_norm(model)
    spec = calculate_spectral_norm(model)
    sharp = calculate_sharpness(model, criterion, train_loader, device=device)

    return {
        "id": config["id"],
        "params": total_params,
        "train_error": train_error,
        "test_error": test_error,
        "gen_gap": gen_gap,
        "l2_norm": l2,
        "spectral_norm": spec,
        "sharpness": sharp,
    }

# ----------------------------------
# 5. Architecture configurations
# ----------------------------------

ARCHITECTURE_CONFIGS = [
    {"id": "C_4_8_16",        "conv1_filters": 4,   "conv2_filters": 8,   "fc1_size": 16},
    {"id": "C_8_16_32",       "conv1_filters": 8,   "conv2_filters": 16,  "fc1_size": 32},
    {"id": "C_8_16_64",       "conv1_filters": 8,   "conv2_filters": 16,  "fc1_size": 64},
    {"id": "C_16_32_128",     "conv1_filters": 16,  "conv2_filters": 32,  "fc1_size": 128},
    {"id": "C_16_32_256",     "conv1_filters": 16,  "conv2_filters": 32,  "fc1_size": 256},
    {"id": "C_32_64_64",      "conv1_filters": 32,  "conv2_filters": 64,  "fc1_size": 64},
    {"id": "C_32_64_128",     "conv1_filters": 32,  "conv2_filters": 64,  "fc1_size": 128},
    {"id": "C_32_64_256",     "conv1_filters": 32,  "conv2_filters": 64,  "fc1_size": 256},
    {"id": "C_64_128_128",    "conv1_filters": 64,  "conv2_filters": 128, "fc1_size": 128},
    {"id": "C_64_128_256",    "conv1_filters": 64,  "conv2_filters": 128, "fc1_size": 256},
    {"id": "C_64_128_512",    "conv1_filters": 64,  "conv2_filters": 128, "fc1_size": 512},
    {"id": "C_128_256_512",   "conv1_filters": 128, "conv2_filters": 256, "fc1_size": 512},
    {"id": "C_128_256_1024",  "conv1_filters": 128, "conv2_filters": 256, "fc1_size": 1024},
    {"id": "C_256_512_1024",  "conv1_filters": 256, "conv2_filters": 512, "fc1_size": 1024},
    {"id": "C_512_1024_1024", "conv1_filters": 512, "conv2_filters": 1024,"fc1_size": 1024},
]

# ----------------------------------
# 6. Main driver for Set C
# ----------------------------------

def run_set_c_experiment(
    epochs: int = EPOCHS_C,
    batch_size: int = BATCH_SIZE_C,
) -> pd.DataFrame:
    """
    Runs the full Model Set C experiment on Fashion-MNIST and computes
    GSCM along with baseline complexity measures.
    """
    print("\n--- Set C: Fashion-MNIST CNN Ensemble (GSCM Core) ---")
    print(f"Architectures: {len(ARCHITECTURE_CONFIGS)}, Epochs: {epochs}, Device: {DEVICE}")

    train_loader, test_loader = load_fashion_mnist(batch_size=batch_size)
    criterion = nn.CrossEntropyLoss()

    all_results: list[dict] = []

    for config in tqdm(ARCHITECTURE_CONFIGS, desc="Training Set C models"):
        try:
            metrics = train_and_evaluate_cnn(
                config=config,
                train_loader=train_loader,
                test_loader=test_loader,
                epochs=epochs,
                lr=LR_C,
                device=DEVICE,
            )
            all_results.append(metrics)
        except Exception as e:
            print(f"[ERROR] {config['id']}: {e}")
            continue

    df_c = pd.DataFrame(all_results)

    # Empirical max normalisation for GSCM components
    S_max = df_c["sharpness"].max()
    N_max = df_c["l2_norm"].max()

    df_c["s_norm"] = df_c["sharpness"] / S_max
    df_c["n_norm"] = df_c["l2_norm"] / N_max
    df_c["gscm_score"] = 0.5 * df_c["s_norm"] + 0.5 * df_c["n_norm"]

    out_name = "dissertation_results_set_c.csv"
    df_c.to_csv(out_name, index=False)

    print("\nSet C summary (sorted by parameter count):")
    print(
        df_c[
            ["id", "params", "train_error", "test_error", "gen_gap", "gscm_score"]
        ].sort_values("params")
    )
    print(f"\nSet C results saved to '{out_name}'")

    return df_c

if __name__ == "__main__":
    run_set_c_experiment()


Using device for Set C: mps

--- Set C: Fashion-MNIST CNN Ensemble (GSCM Core) ---
Architectures: 15, Epochs: 100, Device: mps


Training Set C models:   0%|                             | 0/15 [00:00<?, ?it/s]


[C_4_8_16] parameters: 6794
[C_4_8_16] Epoch [1/100] - Train Loss: 0.8825
[C_4_8_16] Epoch [2/100] - Train Loss: 0.4707
[C_4_8_16] Epoch [3/100] - Train Loss: 0.3989
[C_4_8_16] Epoch [4/100] - Train Loss: 0.3618
[C_4_8_16] Epoch [5/100] - Train Loss: 0.3403
[C_4_8_16] Epoch [6/100] - Train Loss: 0.3235
[C_4_8_16] Epoch [7/100] - Train Loss: 0.3109
[C_4_8_16] Epoch [8/100] - Train Loss: 0.3005
[C_4_8_16] Epoch [9/100] - Train Loss: 0.2961
[C_4_8_16] Epoch [10/100] - Train Loss: 0.2878
[C_4_8_16] Epoch [11/100] - Train Loss: 0.2814
[C_4_8_16] Epoch [12/100] - Train Loss: 0.2760
[C_4_8_16] Epoch [13/100] - Train Loss: 0.2716
[C_4_8_16] Epoch [14/100] - Train Loss: 0.2686
[C_4_8_16] Epoch [15/100] - Train Loss: 0.2655
[C_4_8_16] Epoch [16/100] - Train Loss: 0.2600
[C_4_8_16] Epoch [17/100] - Train Loss: 0.2564
[C_4_8_16] Epoch [18/100] - Train Loss: 0.2541
[C_4_8_16] Epoch [19/100] - Train Loss: 0.2525
[C_4_8_16] Epoch [20/100] - Train Loss: 0.2507
[C_4_8_16] Epoch [21/100] - Train Loss: 

  _, S, _ = torch.linalg.svd(W, full_matrices=False)
Training Set C models:   7%|█▏                | 1/15 [08:40<2:01:30, 520.76s/it]


[C_8_16_32] parameters: 26698
[C_8_16_32] Epoch [1/100] - Train Loss: 0.7414
[C_8_16_32] Epoch [2/100] - Train Loss: 0.4099
[C_8_16_32] Epoch [3/100] - Train Loss: 0.3552
[C_8_16_32] Epoch [4/100] - Train Loss: 0.3228
[C_8_16_32] Epoch [5/100] - Train Loss: 0.3036
[C_8_16_32] Epoch [6/100] - Train Loss: 0.2925
[C_8_16_32] Epoch [7/100] - Train Loss: 0.2774
[C_8_16_32] Epoch [8/100] - Train Loss: 0.2648
[C_8_16_32] Epoch [9/100] - Train Loss: 0.2578
[C_8_16_32] Epoch [10/100] - Train Loss: 0.2466
[C_8_16_32] Epoch [11/100] - Train Loss: 0.2416
[C_8_16_32] Epoch [12/100] - Train Loss: 0.2327
[C_8_16_32] Epoch [13/100] - Train Loss: 0.2289
[C_8_16_32] Epoch [14/100] - Train Loss: 0.2228
[C_8_16_32] Epoch [15/100] - Train Loss: 0.2172
[C_8_16_32] Epoch [16/100] - Train Loss: 0.2097
[C_8_16_32] Epoch [17/100] - Train Loss: 0.2050
[C_8_16_32] Epoch [18/100] - Train Loss: 0.1998
[C_8_16_32] Epoch [19/100] - Train Loss: 0.1950
[C_8_16_32] Epoch [20/100] - Train Loss: 0.1941
[C_8_16_32] Epoch 

Training Set C models:  13%|██▍               | 2/15 [18:25<2:01:02, 558.62s/it]


[C_8_16_64] parameters: 52138
[C_8_16_64] Epoch [1/100] - Train Loss: 0.7444
[C_8_16_64] Epoch [2/100] - Train Loss: 0.3879
[C_8_16_64] Epoch [3/100] - Train Loss: 0.3383
[C_8_16_64] Epoch [4/100] - Train Loss: 0.3136
[C_8_16_64] Epoch [5/100] - Train Loss: 0.2950
[C_8_16_64] Epoch [6/100] - Train Loss: 0.2817
[C_8_16_64] Epoch [7/100] - Train Loss: 0.2659
[C_8_16_64] Epoch [8/100] - Train Loss: 0.2540
[C_8_16_64] Epoch [9/100] - Train Loss: 0.2435
[C_8_16_64] Epoch [10/100] - Train Loss: 0.2355
[C_8_16_64] Epoch [11/100] - Train Loss: 0.2272
[C_8_16_64] Epoch [12/100] - Train Loss: 0.2181
[C_8_16_64] Epoch [13/100] - Train Loss: 0.2134
[C_8_16_64] Epoch [14/100] - Train Loss: 0.2055
[C_8_16_64] Epoch [15/100] - Train Loss: 0.1989
[C_8_16_64] Epoch [16/100] - Train Loss: 0.1910
[C_8_16_64] Epoch [17/100] - Train Loss: 0.1856
[C_8_16_64] Epoch [18/100] - Train Loss: 0.1830
[C_8_16_64] Epoch [19/100] - Train Loss: 0.1752
[C_8_16_64] Epoch [20/100] - Train Loss: 0.1710
[C_8_16_64] Epoch 

Training Set C models:  20%|███▌              | 3/15 [28:15<1:54:33, 572.83s/it]


[C_16_32_128] parameters: 206922
[C_16_32_128] Epoch [1/100] - Train Loss: 0.6869
[C_16_32_128] Epoch [2/100] - Train Loss: 0.3772
[C_16_32_128] Epoch [3/100] - Train Loss: 0.3251
[C_16_32_128] Epoch [4/100] - Train Loss: 0.2936
[C_16_32_128] Epoch [5/100] - Train Loss: 0.2692
[C_16_32_128] Epoch [6/100] - Train Loss: 0.2550
[C_16_32_128] Epoch [7/100] - Train Loss: 0.2410
[C_16_32_128] Epoch [8/100] - Train Loss: 0.2262
[C_16_32_128] Epoch [9/100] - Train Loss: 0.2161
[C_16_32_128] Epoch [10/100] - Train Loss: 0.2031
[C_16_32_128] Epoch [11/100] - Train Loss: 0.1962
[C_16_32_128] Epoch [12/100] - Train Loss: 0.1857
[C_16_32_128] Epoch [13/100] - Train Loss: 0.1756
[C_16_32_128] Epoch [14/100] - Train Loss: 0.1661
[C_16_32_128] Epoch [15/100] - Train Loss: 0.1582
[C_16_32_128] Epoch [16/100] - Train Loss: 0.1537
[C_16_32_128] Epoch [17/100] - Train Loss: 0.1429
[C_16_32_128] Epoch [18/100] - Train Loss: 0.1359
[C_16_32_128] Epoch [19/100] - Train Loss: 0.1298
[C_16_32_128] Epoch [20/1

Training Set C models:  27%|████▊             | 4/15 [41:19<2:00:19, 656.28s/it]


[C_16_32_256] parameters: 409034
[C_16_32_256] Epoch [1/100] - Train Loss: 0.6475
[C_16_32_256] Epoch [2/100] - Train Loss: 0.3644
[C_16_32_256] Epoch [3/100] - Train Loss: 0.3167
[C_16_32_256] Epoch [4/100] - Train Loss: 0.2872
[C_16_32_256] Epoch [5/100] - Train Loss: 0.2643
[C_16_32_256] Epoch [6/100] - Train Loss: 0.2464
[C_16_32_256] Epoch [7/100] - Train Loss: 0.2326
[C_16_32_256] Epoch [8/100] - Train Loss: 0.2196
[C_16_32_256] Epoch [9/100] - Train Loss: 0.2076
[C_16_32_256] Epoch [10/100] - Train Loss: 0.1969
[C_16_32_256] Epoch [11/100] - Train Loss: 0.1860
[C_16_32_256] Epoch [12/100] - Train Loss: 0.1757
[C_16_32_256] Epoch [13/100] - Train Loss: 0.1675
[C_16_32_256] Epoch [14/100] - Train Loss: 0.1580
[C_16_32_256] Epoch [15/100] - Train Loss: 0.1513
[C_16_32_256] Epoch [16/100] - Train Loss: 0.1416
[C_16_32_256] Epoch [17/100] - Train Loss: 0.1342
[C_16_32_256] Epoch [18/100] - Train Loss: 0.1242
[C_16_32_256] Epoch [19/100] - Train Loss: 0.1175
[C_16_32_256] Epoch [20/1

Training Set C models:  33%|██████            | 5/15 [54:31<1:57:29, 704.98s/it]


[C_32_64_64] parameters: 220234
[C_32_64_64] Epoch [1/100] - Train Loss: 0.6663
[C_32_64_64] Epoch [2/100] - Train Loss: 0.3727
[C_32_64_64] Epoch [3/100] - Train Loss: 0.3189
[C_32_64_64] Epoch [4/100] - Train Loss: 0.2871
[C_32_64_64] Epoch [5/100] - Train Loss: 0.2619
[C_32_64_64] Epoch [6/100] - Train Loss: 0.2428
[C_32_64_64] Epoch [7/100] - Train Loss: 0.2294
[C_32_64_64] Epoch [8/100] - Train Loss: 0.2166
[C_32_64_64] Epoch [9/100] - Train Loss: 0.2037
[C_32_64_64] Epoch [10/100] - Train Loss: 0.1894
[C_32_64_64] Epoch [11/100] - Train Loss: 0.1816
[C_32_64_64] Epoch [12/100] - Train Loss: 0.1742
[C_32_64_64] Epoch [13/100] - Train Loss: 0.1607
[C_32_64_64] Epoch [14/100] - Train Loss: 0.1541
[C_32_64_64] Epoch [15/100] - Train Loss: 0.1460
[C_32_64_64] Epoch [16/100] - Train Loss: 0.1365
[C_32_64_64] Epoch [17/100] - Train Loss: 0.1288
[C_32_64_64] Epoch [18/100] - Train Loss: 0.1224
[C_32_64_64] Epoch [19/100] - Train Loss: 0.1141
[C_32_64_64] Epoch [20/100] - Train Loss: 0.1

Training Set C models:  40%|██████▍         | 6/15 [1:09:00<1:54:07, 760.83s/it]


[C_32_64_128] parameters: 421642
[C_32_64_128] Epoch [1/100] - Train Loss: 0.6558
[C_32_64_128] Epoch [2/100] - Train Loss: 0.3662
[C_32_64_128] Epoch [3/100] - Train Loss: 0.3131
[C_32_64_128] Epoch [4/100] - Train Loss: 0.2786
[C_32_64_128] Epoch [5/100] - Train Loss: 0.2578
[C_32_64_128] Epoch [6/100] - Train Loss: 0.2377
[C_32_64_128] Epoch [7/100] - Train Loss: 0.2258
[C_32_64_128] Epoch [8/100] - Train Loss: 0.2109
[C_32_64_128] Epoch [9/100] - Train Loss: 0.1995
[C_32_64_128] Epoch [10/100] - Train Loss: 0.1868
[C_32_64_128] Epoch [11/100] - Train Loss: 0.1765
[C_32_64_128] Epoch [12/100] - Train Loss: 0.1648
[C_32_64_128] Epoch [13/100] - Train Loss: 0.1568
[C_32_64_128] Epoch [14/100] - Train Loss: 0.1468
[C_32_64_128] Epoch [15/100] - Train Loss: 0.1369
[C_32_64_128] Epoch [16/100] - Train Loss: 0.1327
[C_32_64_128] Epoch [17/100] - Train Loss: 0.1205
[C_32_64_128] Epoch [18/100] - Train Loss: 0.1159
[C_32_64_128] Epoch [19/100] - Train Loss: 0.1044
[C_32_64_128] Epoch [20/1

Training Set C models:  47%|███████▍        | 7/15 [1:23:38<1:46:33, 799.17s/it]


[C_32_64_256] parameters: 824458
[C_32_64_256] Epoch [1/100] - Train Loss: 0.6688
[C_32_64_256] Epoch [2/100] - Train Loss: 0.3626
[C_32_64_256] Epoch [3/100] - Train Loss: 0.3109
[C_32_64_256] Epoch [4/100] - Train Loss: 0.2795
[C_32_64_256] Epoch [5/100] - Train Loss: 0.2575
[C_32_64_256] Epoch [6/100] - Train Loss: 0.2421
[C_32_64_256] Epoch [7/100] - Train Loss: 0.2207
[C_32_64_256] Epoch [8/100] - Train Loss: 0.2077
[C_32_64_256] Epoch [9/100] - Train Loss: 0.1975
[C_32_64_256] Epoch [10/100] - Train Loss: 0.1849
[C_32_64_256] Epoch [11/100] - Train Loss: 0.1765
[C_32_64_256] Epoch [12/100] - Train Loss: 0.1667
[C_32_64_256] Epoch [13/100] - Train Loss: 0.1532
[C_32_64_256] Epoch [14/100] - Train Loss: 0.1440
[C_32_64_256] Epoch [15/100] - Train Loss: 0.1344
[C_32_64_256] Epoch [16/100] - Train Loss: 0.1267
[C_32_64_256] Epoch [17/100] - Train Loss: 0.1194
[C_32_64_256] Epoch [18/100] - Train Loss: 0.1086
[C_32_64_256] Epoch [19/100] - Train Loss: 0.1021
[C_32_64_256] Epoch [20/1

Training Set C models:  53%|████████▌       | 8/15 [1:38:14<1:36:06, 823.72s/it]


[C_64_128_128] parameters: 878730
[C_64_128_128] Epoch [1/100] - Train Loss: 0.5985
[C_64_128_128] Epoch [2/100] - Train Loss: 0.3484
[C_64_128_128] Epoch [3/100] - Train Loss: 0.3005
[C_64_128_128] Epoch [4/100] - Train Loss: 0.2697
[C_64_128_128] Epoch [5/100] - Train Loss: 0.2476
[C_64_128_128] Epoch [6/100] - Train Loss: 0.2288
[C_64_128_128] Epoch [7/100] - Train Loss: 0.2109
[C_64_128_128] Epoch [8/100] - Train Loss: 0.1975
[C_64_128_128] Epoch [9/100] - Train Loss: 0.1861
[C_64_128_128] Epoch [10/100] - Train Loss: 0.1742
[C_64_128_128] Epoch [11/100] - Train Loss: 0.1645
[C_64_128_128] Epoch [12/100] - Train Loss: 0.1506
[C_64_128_128] Epoch [13/100] - Train Loss: 0.1405
[C_64_128_128] Epoch [14/100] - Train Loss: 0.1297
[C_64_128_128] Epoch [15/100] - Train Loss: 0.1205
[C_64_128_128] Epoch [16/100] - Train Loss: 0.1114
[C_64_128_128] Epoch [17/100] - Train Loss: 0.1014
[C_64_128_128] Epoch [18/100] - Train Loss: 0.0932
[C_64_128_128] Epoch [19/100] - Train Loss: 0.0860
[C_64

Training Set C models:  60%|█████████      | 9/15 [2:04:41<1:46:13, 1062.30s/it]


[C_64_128_256] parameters: 1682954
[C_64_128_256] Epoch [1/100] - Train Loss: 0.5887
[C_64_128_256] Epoch [2/100] - Train Loss: 0.3523
[C_64_128_256] Epoch [3/100] - Train Loss: 0.2995
[C_64_128_256] Epoch [4/100] - Train Loss: 0.2692
[C_64_128_256] Epoch [5/100] - Train Loss: 0.2453
[C_64_128_256] Epoch [6/100] - Train Loss: 0.2271
[C_64_128_256] Epoch [7/100] - Train Loss: 0.2108
[C_64_128_256] Epoch [8/100] - Train Loss: 0.1974
[C_64_128_256] Epoch [9/100] - Train Loss: 0.1817
[C_64_128_256] Epoch [10/100] - Train Loss: 0.1704
[C_64_128_256] Epoch [11/100] - Train Loss: 0.1590
[C_64_128_256] Epoch [12/100] - Train Loss: 0.1493
[C_64_128_256] Epoch [13/100] - Train Loss: 0.1377
[C_64_128_256] Epoch [14/100] - Train Loss: 0.1280
[C_64_128_256] Epoch [15/100] - Train Loss: 0.1217
[C_64_128_256] Epoch [16/100] - Train Loss: 0.1110
[C_64_128_256] Epoch [17/100] - Train Loss: 0.0993
[C_64_128_256] Epoch [18/100] - Train Loss: 0.0902
[C_64_128_256] Epoch [19/100] - Train Loss: 0.0815
[C_6

Training Set C models:  67%|█████████▎    | 10/15 [2:32:49<1:44:36, 1255.38s/it]


[C_64_128_512] parameters: 3291402
[C_64_128_512] Epoch [1/100] - Train Loss: 0.5944
[C_64_128_512] Epoch [2/100] - Train Loss: 0.3435
[C_64_128_512] Epoch [3/100] - Train Loss: 0.2959
[C_64_128_512] Epoch [4/100] - Train Loss: 0.2669
[C_64_128_512] Epoch [5/100] - Train Loss: 0.2398
[C_64_128_512] Epoch [6/100] - Train Loss: 0.2211
[C_64_128_512] Epoch [7/100] - Train Loss: 0.2068
[C_64_128_512] Epoch [8/100] - Train Loss: 0.1931
[C_64_128_512] Epoch [9/100] - Train Loss: 0.1804
[C_64_128_512] Epoch [10/100] - Train Loss: 0.1657
[C_64_128_512] Epoch [11/100] - Train Loss: 0.1541
[C_64_128_512] Epoch [12/100] - Train Loss: 0.1451
[C_64_128_512] Epoch [13/100] - Train Loss: 0.1332
[C_64_128_512] Epoch [14/100] - Train Loss: 0.1216
[C_64_128_512] Epoch [15/100] - Train Loss: 0.1123
[C_64_128_512] Epoch [16/100] - Train Loss: 0.1034
[C_64_128_512] Epoch [17/100] - Train Loss: 0.0911
[C_64_128_512] Epoch [18/100] - Train Loss: 0.0831
[C_64_128_512] Epoch [19/100] - Train Loss: 0.0738
[C_6

Training Set C models:  73%|██████████▎   | 11/15 [3:02:11<1:34:01, 1410.39s/it]


[C_128_256_512] parameters: 6724618
[C_128_256_512] Epoch [1/100] - Train Loss: 0.5494
[C_128_256_512] Epoch [2/100] - Train Loss: 0.3290
[C_128_256_512] Epoch [3/100] - Train Loss: 0.2831
[C_128_256_512] Epoch [4/100] - Train Loss: 0.2478
[C_128_256_512] Epoch [5/100] - Train Loss: 0.2294
[C_128_256_512] Epoch [6/100] - Train Loss: 0.2068
[C_128_256_512] Epoch [7/100] - Train Loss: 0.1911
[C_128_256_512] Epoch [8/100] - Train Loss: 0.1756
[C_128_256_512] Epoch [9/100] - Train Loss: 0.1622
[C_128_256_512] Epoch [10/100] - Train Loss: 0.1501
[C_128_256_512] Epoch [11/100] - Train Loss: 0.1356
[C_128_256_512] Epoch [12/100] - Train Loss: 0.1256
[C_128_256_512] Epoch [13/100] - Train Loss: 0.1124
[C_128_256_512] Epoch [14/100] - Train Loss: 0.1001
[C_128_256_512] Epoch [15/100] - Train Loss: 0.0908
[C_128_256_512] Epoch [16/100] - Train Loss: 0.0792
[C_128_256_512] Epoch [17/100] - Train Loss: 0.0689
[C_128_256_512] Epoch [18/100] - Train Loss: 0.0614
[C_128_256_512] Epoch [19/100] - Tra

Training Set C models:  80%|███████████▏  | 12/15 [4:07:27<1:48:38, 2172.74s/it]


[C_128_256_1024] parameters: 13152778
[C_128_256_1024] Epoch [1/100] - Train Loss: 0.5506
[C_128_256_1024] Epoch [2/100] - Train Loss: 0.3295
[C_128_256_1024] Epoch [3/100] - Train Loss: 0.2807
[C_128_256_1024] Epoch [4/100] - Train Loss: 0.2514
[C_128_256_1024] Epoch [5/100] - Train Loss: 0.2286
[C_128_256_1024] Epoch [6/100] - Train Loss: 0.2075
[C_128_256_1024] Epoch [7/100] - Train Loss: 0.1909
[C_128_256_1024] Epoch [8/100] - Train Loss: 0.1749
[C_128_256_1024] Epoch [9/100] - Train Loss: 0.1630
[C_128_256_1024] Epoch [10/100] - Train Loss: 0.1472
[C_128_256_1024] Epoch [11/100] - Train Loss: 0.1353
[C_128_256_1024] Epoch [12/100] - Train Loss: 0.1216
[C_128_256_1024] Epoch [13/100] - Train Loss: 0.1098
[C_128_256_1024] Epoch [14/100] - Train Loss: 0.1000
[C_128_256_1024] Epoch [15/100] - Train Loss: 0.0868
[C_128_256_1024] Epoch [16/100] - Train Loss: 0.0794
[C_128_256_1024] Epoch [17/100] - Train Loss: 0.0694
[C_128_256_1024] Epoch [18/100] - Train Loss: 0.0612
[C_128_256_1024]

Training Set C models:  87%|████████████▏ | 13/15 [5:20:21<1:34:39, 2839.65s/it]


[C_256_512_1024] parameters: 26884106
[C_256_512_1024] Epoch [1/100] - Train Loss: 0.5184
[C_256_512_1024] Epoch [2/100] - Train Loss: 0.3130
[C_256_512_1024] Epoch [3/100] - Train Loss: 0.2687
[C_256_512_1024] Epoch [4/100] - Train Loss: 0.2402
[C_256_512_1024] Epoch [5/100] - Train Loss: 0.2164
[C_256_512_1024] Epoch [6/100] - Train Loss: 0.1943
[C_256_512_1024] Epoch [7/100] - Train Loss: 0.1774
[C_256_512_1024] Epoch [8/100] - Train Loss: 0.1577
[C_256_512_1024] Epoch [9/100] - Train Loss: 0.1446
[C_256_512_1024] Epoch [10/100] - Train Loss: 0.1325
[C_256_512_1024] Epoch [11/100] - Train Loss: 0.1165
[C_256_512_1024] Epoch [12/100] - Train Loss: 0.1036
[C_256_512_1024] Epoch [13/100] - Train Loss: 0.0946
[C_256_512_1024] Epoch [14/100] - Train Loss: 0.0786
[C_256_512_1024] Epoch [15/100] - Train Loss: 0.0699
[C_256_512_1024] Epoch [16/100] - Train Loss: 0.0601
[C_256_512_1024] Epoch [17/100] - Train Loss: 0.0515
[C_256_512_1024] Epoch [18/100] - Train Loss: 0.0417
[C_256_512_1024]

Training Set C models:  93%|█████████████ | 14/15 [8:16:00<1:26:05, 5165.12s/it]


[C_512_1024_1024] parameters: 56116234
[C_512_1024_1024] Epoch [1/100] - Train Loss: 0.4880
[C_512_1024_1024] Epoch [2/100] - Train Loss: 0.3032
[C_512_1024_1024] Epoch [3/100] - Train Loss: 0.2566
[C_512_1024_1024] Epoch [4/100] - Train Loss: 0.2289
[C_512_1024_1024] Epoch [5/100] - Train Loss: 0.2000
[C_512_1024_1024] Epoch [6/100] - Train Loss: 0.1807
[C_512_1024_1024] Epoch [7/100] - Train Loss: 0.1608
[C_512_1024_1024] Epoch [8/100] - Train Loss: 0.1428
[C_512_1024_1024] Epoch [9/100] - Train Loss: 0.1287
[C_512_1024_1024] Epoch [10/100] - Train Loss: 0.1134
[C_512_1024_1024] Epoch [11/100] - Train Loss: 0.0989
[C_512_1024_1024] Epoch [12/100] - Train Loss: 0.0878
[C_512_1024_1024] Epoch [13/100] - Train Loss: 0.0750
[C_512_1024_1024] Epoch [14/100] - Train Loss: 0.0622
[C_512_1024_1024] Epoch [15/100] - Train Loss: 0.0519
[C_512_1024_1024] Epoch [16/100] - Train Loss: 0.0441
[C_512_1024_1024] Epoch [17/100] - Train Loss: 0.0364
[C_512_1024_1024] Epoch [18/100] - Train Loss: 0.03

Training Set C models: 100%|███████████████| 15/15 [19:32:25<00:00, 4689.72s/it]



Set C summary (sorted by parameter count):
                 id    params  train_error  test_error   gen_gap  gscm_score
0          C_4_8_16      6794     0.062783      0.0989  0.036117    0.578779
1         C_8_16_32     26698     0.010833      0.0969  0.086067    0.794021
2         C_8_16_64     52138     0.000333      0.0939  0.093567    0.588089
3       C_16_32_128    206922     0.000000      0.0825  0.082500    0.424922
5        C_32_64_64    220234     0.000000      0.0770  0.077000    0.374797
4       C_16_32_256    409034     0.000000      0.0793  0.079300    0.404404
6       C_32_64_128    421642     0.000000      0.0780  0.078000    0.410260
7       C_32_64_256    824458     0.000000      0.0754  0.075400    0.388005
8      C_64_128_128    878730     0.000000      0.0716  0.071600    0.359211
9      C_64_128_256   1682954     0.000000      0.0699  0.069900    0.375746
10     C_64_128_512   3291402     0.000000      0.0686  0.068600    0.387114
11    C_128_256_512   6724618   