In [1]:

# Imports and setup 

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as random

from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, Normalize, Compose
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Reproducibility setup

seed = 69
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)


## Task 1: Data Processing

In [2]:

# 1. Define the transformations
# ToTensor() converts images to PyTorch Tensors
# Normalize() scales the tensor values to a specific range (mean=0.5, std=0.5)
transform = Compose([
    ToTensor(),
    Normalize((0.5,), (0.5,))
])

# downloading training and 
train_dataset = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

validation_dataset = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)

# creating dataLoader objects
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

print("Data processing complete. DataLoaders are ready.")


Data processing complete. DataLoaders are ready.


## Task 2: Building and Training a Base Model

In [3]:

class BaselineMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # nn.Flatten() converts the 28x28 image into a 1D array of 784 pixels
        self.flatten = nn.Flatten()
        
        # Define the sequence of layers
        self.network_stack = nn.Sequential(
            # Input layer implicitly takes 784 features
            nn.Linear(28*28, 128),  # First hidden layer with 128 neurons
            nn.ReLU(),             # ReLU activation function
            nn.Linear(128, 64),    # Second hidden layer with 64 neurons
            nn.ReLU(),             # ReLU activation function
            nn.Linear(64, 10)      # Output layer with 10 neurons (for 10 classes)
        )

    def forward(self, x):
        # Define the forward pass
        x = self.flatten(x)
        logits = self.network_stack(x)
        return logits

In [4]:

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train() # Set the model to training mode
    for batch, (X, y) in enumerate(dataloader):
        # 1. Forward pass: compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # 2. Backpropagation
        optimizer.zero_grad() # Reset gradients from previous iteration
        loss.backward()       # Calculate gradients
        optimizer.step()      # Update model weights

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# Function to evaluate the model
def validation_loop(dataloader, model, loss_fn):
    model.eval() # Set the model to evaluation mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    val_loss, correct = 0, 0

    with torch.no_grad(): # No need to calculate gradients during evaluation
        for X, y in dataloader:
            pred = model(X)
            val_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    val_loss /= num_batches
    correct /= size
    accuracy = 100 * correct
    print(f"Validation Error: \n Accuracy: {accuracy:>0.1f}%, Avg loss: {val_loss:>8f} \n")
    return accuracy

In [5]:

baseline_model = BaselineMLP()

# Define the loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(baseline_model.parameters(), lr=0.001)

# Train the model for 5 epochs - 10 was way too much
epochs = 5
baseline_accuracy = 0
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, baseline_model, loss_fn, optimizer)
    baseline_accuracy = validation_loop(validation_loader, baseline_model, loss_fn)

print("Baseline training complete!")
print(f"Final Baseline Accuracy: {baseline_accuracy:.2f}%")


Epoch 1
-------------------------------
loss: 2.313334  [   64/60000]
loss: 0.651184  [ 6464/60000]
loss: 0.690483  [12864/60000]
loss: 0.523806  [19264/60000]
loss: 0.435783  [25664/60000]
loss: 0.407183  [32064/60000]
loss: 0.546594  [38464/60000]
loss: 0.552970  [44864/60000]
loss: 0.491835  [51264/60000]
loss: 0.343418  [57664/60000]
Validation Error: 
 Accuracy: 84.6%, Avg loss: 0.431423 

Epoch 2
-------------------------------
loss: 0.258353  [   64/60000]
loss: 0.412075  [ 6464/60000]
loss: 0.336064  [12864/60000]
loss: 0.329296  [19264/60000]
loss: 0.347003  [25664/60000]
loss: 0.377744  [32064/60000]
loss: 0.283524  [38464/60000]
loss: 0.357285  [44864/60000]
loss: 0.441620  [51264/60000]
loss: 0.547375  [57664/60000]
Validation Error: 
 Accuracy: 85.6%, Avg loss: 0.397980 

Epoch 3
-------------------------------
loss: 0.405445  [   64/60000]
loss: 0.489626  [ 6464/60000]
loss: 0.162282  [12864/60000]
loss: 0.339373  [19264/60000]
loss: 0.468770  [25664/60000]
loss: 0.276814

## Task 3: Hyperparameter Optimization Experiment

In [None]:

learning_rates = [0.1, 0.01, 0.001]
optimizers_to_test = [torch.optim.Adam, torch.optim.SGD, torch.optim.RMSprop]
neuron_counts = [64, 128, 256] # Example for tuning hidden layer 1 neurons

# Store results
results = []

# 2. Create the nested loops to iterate through all combinations
for lr in learning_rates:
    for opt_class in optimizers_to_test:
        for neurons in neuron_counts:
            print(f"--- Training with lr={lr}, optimizer={opt_class.__name__}, neurons={neurons} ---")
            
            # NOTE: You will need to modify your model class to accept the neuron count
            # as an argument, or create a new class for this experiment.
            # For simplicity, let's assume a modified model class `CustomMLP(neurons)`.
            # model = CustomMLP(neurons) 
            
            model = BaselineMLP() # Using baseline for structure, but you'd adapt this
            
            # Instantiate the optimizer with the current learning rate
            optimizer = opt_class(model.parameters(), lr=lr)
            loss_fn = nn.CrossEntropyLoss()
            
            # Train for the specified number of epochs, 5 - 10 was way too much
            epochs = 5
            final_accuracy = 0
            for t in range(epochs):
                # (You would call your train_loop and validation_loop here)
                # For brevity, I'm skipping the printouts inside the loop
                train_loop(train_loader, model, loss_fn, optimizer)
                final_accuracy = validation_loop(validation_loader, model, loss_fn)

            # 3. Store the results
            results.append({
                'learning_rate': lr,
                'optimizer': opt_class.__name__,
                'hidden_neurons_1': neurons,
                'final_accuracy': final_accuracy
            })

# 4. Convert results to a Pandas DataFrame for easy analysis
results_df = pd.DataFrame(results)
print(results_df)


--- Training with lr=0.1, optimizer=Adam, neurons=64 ---
loss: 2.301333  [   64/60000]
loss: 2.140321  [ 6464/60000]
loss: 1.466400  [12864/60000]
loss: 1.747969  [19264/60000]
loss: 1.755101  [25664/60000]
loss: 2.489678  [32064/60000]
loss: 2.292679  [38464/60000]
