# Part 1: Image Classification — Fashion‑MNIST (11 points)

**Goal:** Build and train a lightweight CNN on Fashion‑MNIST following the given architecture and specs.

In [None]:
# import something you may find useful
import os, random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision import datasets, transforms
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

# set seed
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

# create a folder to save data
data_dir = "./data"
os.makedirs(data_dir, exist_ok=True)

<div class="alert alert-success">
    <h3>1) Data</h3>
    load Fashion‑MNIST and create train/val/test splits (1 point)
</div>

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Use the following code to load the Fashion-MNIST if you want, or you can download the data manually
train_full = datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
test_ds = datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)


In [None]:
# CODE HERE
# Define the training, validation, and test datasets. 
# Note that you may want to select the validation set from the training set by randomly splitting the data






batch_size = 
# You may want to verify the data
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)

len(train_ds), len(val_ds), len(test_ds)

<div class="alert alert-success">
    <h3>2) Model</h3>
    Build the CNN model (3 points)
</div>

In [None]:
# Define the network architecture
class SmallCNN(nn.Module):
    def __init__(self):
        # CODE HERE
        # Define the convolutional layers, pooling layers...




    def forward(self, x):
        # CODE HERE
        # Implement the forward pass. Return the final output.




        return out
        
        

# You may want to verify the architecture of CNN
model = SmallCNN().to(device)
model

<div class="alert alert-success">
    <h3>3) Loss & Optimizer</h3>
    CrossEntropy + SGD (1 point)
</div>

In [None]:
# CODE HERE



# You may want to verify the criterion and optimizer
criterion, optimizer

<div class="alert alert-success">
    <h3>4) Training loop</h3>
    track training and validation loss (1 points)
</div>

In [None]:
# CODE HERE
# Train the network for each epoch
def train_one_epoch(...):
    model.train()
    ...







# CODE HERE
# Evaluate the model on the validation set
@torch.no_grad()
def evaluate(...):
    model.eval()
    ...




<div class="alert alert-success">
    <h3>5) Run training</h3>
    ≥ 15 epochs (1 point)
</div>

In [None]:
# CODE HERE
# Set the training epochs
epochs = 
train_losses, val_losses, train_accs, val_accs = [], [], [], []


# CODE HERE
# Run the training epochs
for epoch in range(1, epochs + 1):
    tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    va_loss, va_acc = evaluate(model, val_loader, criterion, device)
    ...
    



<div class="alert alert-success">
    <h3>6) Plot</h3>
    training vs validation loss (1 point)
</div>

In [None]:
# CODE HERE
# Plot the "Training vs Validation Loss"




plt.show()

<div class="alert alert-success">
    <h3>7) Evaluate on test set</h3>
    report accuracy on test set. Then report the model performance by showing the test accuracy (1 point)
</div>

In [None]:
# CODE HERE
# Test the model on the test set


print(f"[Fashion-MNIST test] Loss = {test_loss:.4f} | Acc = {test_acc*100:.2f}%")

<div class="alert alert-success">
    <h3>8) Discussion prompts (2 point)</h3>
    (a) Which techniques from Table 3 report better performance than your method? Which performs worse? Why do you think this is the case? Briefly explain. (1 point)<br>
    (b) Do you have any other ideas about how to improve your method? Briefly explain. (1 point)
</div>

# Part 2: Image Classification — CIAFR-100 (14 points)

**Goal:** Build and train a lightweight CNN on CIAFR-100 following the given architecture and specs.

In [None]:
# import something you may find useful
import os, random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision import datasets, transforms
import numpy as np

import matplotlib
import matplotlib.pyplot as plt


def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

data_dir = "./data"
os.makedirs(data_dir, exist_ok=True)


<div class="alert alert-success">
    <h3>1) Data</h3>
    load CIFAR-100 and create train/val/test splits (1 point)
</div>

In [None]:
# CIFAR-100 normalization stats
CIFAR100_MEAN = (0.5071, 0.4867, 0.4408)
CIFAR100_STD = (0.2675, 0.2565, 0.2761)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR100_MEAN, CIFAR100_STD),
])


# Load CIFAR-100 (train + test)
# Use the following code to load the CIFAR-100 if you want, or you can download the data manually
train_full = datasets.CIFAR100(root=data_dir, train=True,  download=True, transform=transform)
test_ds = datasets.CIFAR100(root=data_dir, train=False, download=True, transform=transform)



In [None]:
# CODE HERE
# Define the training, validation, and test datasets. 
# Note that you may want to select the validation set from the training set by randomly splitting the data





batch_size = 
# You may want to verify the data
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=False)

len(train_ds), len(val_ds), len(test_ds)

<div class="alert alert-success">
    <h3>2) Model</h3>
    Build the CNN model (4 points)
</div>

In [None]:
# Define the network architecture
class SmallCNN(nn.Module):
    def __init__(self):
        # CODE HERE
        # Define the convolutional layers, pooling layers...




    def forward(self, x):
        # CODE HERE
        # Implement the forward pass. Return the final output.




        return out
        
        



# You may want to verify the architecture of CNN
model = SmallCNN().to(device)
model

<div class="alert alert-success">
    <h3>3) Loss & Optimizer</h3>
    CrossEntropy + SGD (1 point)
</div>

In [None]:
# CODE HERE



# You may want to verify the criterion and optimizer
criterion, optimizer

<div class="alert alert-success">
    <h3>4) Training loop</h3>
    track training and validation loss (1 points)
</div>

In [None]:
# CODE HERE
# Train the network for each epoch
def train_one_epoch(...):
    model.train()
    ...







# CODE HERE
# Evaluate the model on the validation set
@torch.no_grad()
def evaluate(...):
    model.eval()
    ...




<div class="alert alert-success">
    <h3>5) Run training</h3>
    ≥ 15 epochs (1 point)
</div>

In [None]:
# CODE HERE
# Set the training epochs
epochs = 
train_losses, val_losses, train_accs, val_accs = [], [], [], []


# CODE HERE
# Run the training epochs
for epoch in range(1, epochs + 1):
    tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    va_loss, va_acc = evaluate(model, val_loader, criterion, device)
    ...



<div class="alert alert-success">
    <h3>6) Plot</h3>
    training vs validation loss (1 point)
</div>

In [None]:
# CODE HERE


plt.show()

<div class="alert alert-success">
    <h3>7) Evaluate on test set</h3>
    report accuracy (1 point)
</div>

In [None]:
# CODE HERE
# Test the model on the test set


print(f"[CIFAR-100 test] Loss = {test_loss:.4f} | Acc = {test_acc*100:.2f}%")

<div class="alert alert-success">
    <h3>8) Discussion prompts (4 points)</h3>
    (a) Compared to your Fashion-MNIST results in Part 1, how did the training and validation curves change on CIFAR-100? Did you observe more overfitting, underfitting, or slower convergence? Explain briefly. (1 point)<br>
    (b) Based on the ResNet results above (around $80\%$ accuracy) and your own results (around $35$-$45\%$), what architectural differences do you think account for most of the performance gap? Consider depth, width (number of channels), skip connections, and data augmentation, and any modifications you have made to your own architecture. (2 point)<br>
    (c) What change did you make to your architecture that you were most surprised about?
This could be a large increase or decrease in performance, or something you added that
didn’t change anything. What is your big takeaway from that experience? (1 point)<br>
    
</div>