In [1]:
import torch
print(torch.cuda.is_available())

False


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import numpy as np


In [3]:
transform = transforms.Compose([
    transforms.ToTensor(), # to tensor
    transforms.Normalize((0.5), (0.5)) # normalize
])

In [4]:
# Datasets
trainset = torchvision.datasets.FashionMNIST(train=True, download=True,
                                  transform=transform, root='../data')

testset = torchvision.datasets.FashionMNIST(train=False, download=True,
                                            transform=transform, root='../data')

classes = classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

In [5]:
def create_model(activation_type):
    act = nn.ReLU() if activation_type == 'ReLU' else nn.Sigmoid()

    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(784, 1024),
        act,
        nn.Linear(1024, 1024),
        act,
        nn.Linear(1024, 10)
    )

In [6]:
batch_sizes = [1, 10, 1000]
learning_rates = [0.1, 0.01, 0.001]
activations = ['ReLU', 'Sigmoid']
criterion = torch.nn.CrossEntropyLoss()
results = []

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on: {device}")

for act in activations:
    for bs in batch_sizes:
        for lr in learning_rates:
            # 1. Create DataLoader with batch_size
            trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs,
                                                      shuffle=True, num_workers=2)
            testloader = torch.utils.data.DataLoader(testset, batch_size=bs,
                                                     shuffle=False, num_workers=2)

            # 2. Initialize Model with activation
            model = create_model(act)
            model.to(device)
            print(f"Model on GPU: {next(model.parameters()).is_cuda}") # Check if model is on GPU

            # 3. Initialize Optimizer with lr
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0)

            # print settings
            print(f"Activation: {act}, Batch Size: {bs}, Learning Rate: {lr}")

            # 4. Train for X epochs
            for epoch in range(30):
                running_loss = 0
                print_interval = max(1, (60000 // bs) // 5)
                for i, data in enumerate(trainloader, 0):
                    inputs, labels = data

                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    if i == 0 and epoch == 0:
                        print(f"Inputs on GPU: {inputs.is_cuda}") # Check if inputs are on GPU (first batch only)

                    optimizer.zero_grad()

                     # forward
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    #back
                    loss.backward()
                    optimizer.step()

                    # print loss
                    running_loss += loss.item()
                    if i % print_interval ==  print_interval - 1:
                        avg_loss = running_loss / print_interval
                        print(f'[Epoch {epoch + 1}, Batch {i + 1:5d}] loss: {avg_loss:.3f}')
                        running_loss = 0.0

            print('finished training')

            # 5. Record final Accuracy
            correct = 0
            total = 0
            with torch.no_grad():
                for data in testloader:
                    images, labels = data

                    images = images.to(device)
                    labels = labels.to(device)

                    outputs = model(images)

                    _, predicted = torch.max(outputs.data, 1) # index with highest score
                    total += labels.size(0) # count total images procoeses so far
                    correct += (predicted == labels).sum().item()

            accuracy = 100 * correct / total
            results.append({
                'Activation': act,
                'Batch Size': bs,
                'Learning Rate': lr,
                'Accuracy': accuracy
            })


print("\n--- FINAL RESULTS TABLE ---")
print(f"{'Act':<10} | {'BS':<6} | {'LR':<6} | {'Accuracy':<10}")
for r in results:
    print(f"{r['Activation']:<10} | {r['Batch Size']:<6} | {r['Learning Rate']:<6} | {r['Accuracy']:.2f}%")

Training on: cpu
Model on GPU: False
Activation: ReLU, Batch Size: 1, Learning Rate: 0.1
Inputs on GPU: False
[Epoch 1, Batch 12000] loss: nan
[Epoch 1, Batch 24000] loss: nan
[Epoch 1, Batch 36000] loss: nan
[Epoch 1, Batch 48000] loss: nan
[Epoch 1, Batch 60000] loss: nan
[Epoch 2, Batch 12000] loss: nan
[Epoch 2, Batch 24000] loss: nan
[Epoch 2, Batch 36000] loss: nan
[Epoch 2, Batch 48000] loss: nan
[Epoch 2, Batch 60000] loss: nan
[Epoch 3, Batch 12000] loss: nan
[Epoch 3, Batch 24000] loss: nan
[Epoch 3, Batch 36000] loss: nan
[Epoch 3, Batch 48000] loss: nan
[Epoch 3, Batch 60000] loss: nan
[Epoch 4, Batch 12000] loss: nan
[Epoch 4, Batch 24000] loss: nan
[Epoch 4, Batch 36000] loss: nan
[Epoch 4, Batch 48000] loss: nan
[Epoch 4, Batch 60000] loss: nan
[Epoch 5, Batch 12000] loss: nan
[Epoch 5, Batch 24000] loss: nan
[Epoch 5, Batch 36000] loss: nan
[Epoch 5, Batch 48000] loss: nan
[Epoch 5, Batch 60000] loss: nan
[Epoch 6, Batch 12000] loss: nan
[Epoch 6, Batch 24000] loss: nan

libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x1115ffd30>
Traceback (most recent call last):
  File "/Users/joelgc/2026/deeplearn440/env/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/Users/joelgc/2026/deeplearn440/env/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1628, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/popen_fork.py", line 40, in wait

KeyboardInterrupt: 