# MNIST CNN Regression with PyTorch

## Design
Fix a CNN architecture.
- CNN A1-A5 (seeds: 1,2,3,4,5): Randomly initialized weights. Not trained.
    - CNN B1-B5 (seeds: 1,2,3,4,5): Randomly initialized weights. Trained to do regression.
    - CNN C1-C5 (seeds: 1,2,3,4,5): Randomly initialized weights. Trained to do classification.

In [45]:
import torch
import numpy as np
from torchvision.datasets import MNIST


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# DEVICE = torch.device("cpu")
print("using", DEVICE)
MNIST_TRAIN = MNIST(
    root="~/Desktop/AliasingOperatorExperiments/data", train=True, download=True
)
MNIST_TEST = MNIST(
    root="~/Desktop/AliasingOperatorExperiments/data", train=False, download=True
)

import torch
from torchvision.datasets import MNIST
from torch.utils.data import TensorDataset, DataLoader

# ---- Load MNIST data ----
mnist_digits = MNIST_TRAIN.data
mnist_labels = MNIST_TRAIN.targets
test_digits = MNIST_TEST.data
test_labels = MNIST_TEST.targets

# ---- Convert to PyTorch tensors ----
X = mnist_digits.float().reshape(-1, 1, 28, 28) / 255.0
y = mnist_labels.long()
test_X = test_digits.float().reshape(-1, 1, 28, 28) / 255.0
test_y = test_labels.long()

## Randomly shuffle
indices = torch.randperm(X.shape[0])
train_ds = TensorDataset(
    X[indices],
    torch.nn.functional.one_hot(y[indices], num_classes=10),
)
test_ds = TensorDataset(test_X, test_y)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=256)

using cuda


In [46]:
from torch import optim
from mnist_embeddings import MnistConvNet


def accuracy(net: MnistConvNet, device: torch.device) -> float:
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = net(xb)
            predicted = torch.argmax(preds, 1)
            total += yb.size(0)
            correct += (predicted == yb).sum().item()
    return correct / total


def train_regression(net: MnistConvNet, device: torch.device, epochs: int = 10) -> MnistConvNet:
    criterion = torch.nn.SmoothL1Loss()
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    # optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # ---- Training loop ----
    for epoch in range(epochs):
        net.train()
        total_loss = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            preds = net(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"    Epoch {epoch+1}: train_loss = {total_loss / len(train_loader):.4f}")
    return net


def train_classification(net: MnistConvNet, device: torch.device, epochs: int = 10) -> MnistConvNet:
    criterion = torch.nn.CrossEntropyLoss()  # Changed from SmoothL1Loss
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    # optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # ---- Training loop ----
    for epoch in range(epochs):
        net.train()
        total_loss = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            preds = torch.nn.functional.softmax(net(xb), dim=1)
            loss = criterion(preds, yb.float())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"    Epoch {epoch+1}: train_loss = {total_loss / len(train_loader):.4f}")
    return net

In [47]:
import hashlib
import io
import torch

from mnist_embeddings import MnistConvNet

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

for seed in range(1, 6):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    net = MnistConvNet()
    torch.save(net.state_dict(), fo := io.BytesIO())
    fo.seek(0)
    hsh = hashlib.sha256(fo.read()).hexdigest()
    torch.save(net.state_dict(), f"mnist-cnn-A{seed}-{hsh}.pth")

for seed in range(1, 6):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    net = MnistConvNet().to(DEVICE)
    trained_net = train_regression(net, DEVICE, epochs=5)
    print(f"Seed {seed}: Test Accuracy = {accuracy(trained_net, DEVICE):.4f}")
    torch.save(trained_net.state_dict(), fo := io.BytesIO())
    fo.seek(0)
    hsh = hashlib.sha256(fo.read()).hexdigest()
    torch.save(trained_net.state_dict(), f"mnist-cnn-B{seed}-{hsh}.pth")

for seed in range(1, 6):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    net = MnistConvNet().to(DEVICE)
    trained_net = train_classification(net, DEVICE, epochs=5)
    print(f"Seed {seed}: Test Accuracy = {accuracy(trained_net, DEVICE):.4f}")
    torch.save(trained_net.state_dict(), fo := io.BytesIO())
    fo.seek(0)
    hsh = hashlib.sha256(fo.read()).hexdigest()
    torch.save(trained_net.state_dict(), f"mnist-cnn-C{seed}-{hsh}.pth")

    Epoch 1: train_loss = 0.0066
    Epoch 2: train_loss = 0.0023
    Epoch 3: train_loss = 0.0018
    Epoch 4: train_loss = 0.0015
    Epoch 5: train_loss = 0.0013
Seed 1: Test Accuracy = 0.9921
    Epoch 1: train_loss = 0.0061
    Epoch 2: train_loss = 0.0022
    Epoch 3: train_loss = 0.0017
    Epoch 4: train_loss = 0.0014
    Epoch 5: train_loss = 0.0012
Seed 2: Test Accuracy = 0.9922
    Epoch 1: train_loss = 0.0055
    Epoch 2: train_loss = 0.0021
    Epoch 3: train_loss = 0.0016
    Epoch 4: train_loss = 0.0014
    Epoch 5: train_loss = 0.0012
Seed 3: Test Accuracy = 0.9931
    Epoch 1: train_loss = 0.0058
    Epoch 2: train_loss = 0.0021
    Epoch 3: train_loss = 0.0016
    Epoch 4: train_loss = 0.0014
    Epoch 5: train_loss = 0.0012
Seed 4: Test Accuracy = 0.9926
    Epoch 1: train_loss = 0.0051
    Epoch 2: train_loss = 0.0020
    Epoch 3: train_loss = 0.0016
    Epoch 4: train_loss = 0.0013
    Epoch 5: train_loss = 0.0012
Seed 5: Test Accuracy = 0.9933
    Epoch 1: train_l