In [None]:
# ============================
# Step 1: Unzip Dataset
# ============================

import zipfile
import os

# TODO: Set the correct uploaded ZIP filename
zip_path = 'dataset.zip' # <-- replace with the exact uploaded filename
extract_path = './dataset' # folder where you want to extract

# Unzip the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print(f"✅ Unzipped to: {os.path.abspath(extract_path)}")

# Optional: List the extracted folder contents
print("Contents:")
print(os.listdir(extract_path))

In [None]:
# ============================
# Step 2: Split Dataset into Train/Test
# ============================

import shutil
import random

# TODO: Set source folder (where unzipped dataset is) and target folder (where split dataset will go)
SOURCE_DIR = 'dataset' # folder from unzip
TARGET_DIR = 'dataset_split' # new folder to store split data

# Split ratios
train_ratio = 0.7
test_ratio = 0.3 # note: you can calculate this as 1 - train_ratio if needed

# Set random seed for reproducibility
random.seed(42)

# Create target train/test directories per class
for split in ['train', 'test']:
    for class_name in os.listdir(SOURCE_DIR):
        os.makedirs(os.path.join(TARGET_DIR, split, class_name), exist_ok=True)
        
# Process each class folder
for class_name in os.listdir(SOURCE_DIR):
    class_path = os.path.join(SOURCE_DIR, class_name)
    if not os.path.isdir(class_path):
        continue
        
    images = os.listdir(class_path)
    random.shuffle(images)
        
    # Calculate split point
    train_cutoff = int(len(images) * train_ratio)

    # Split images
    train_images = images[:train_cutoff]
    test_images = images[train_cutoff:]

    # Copy training images
    for img_name in train_images:
        src = os.path.join(class_path, img_name)
        dst = os.path.join(TARGET_DIR, 'train', class_name, img_name)
        shutil.copyfile(src, dst)

    # Copy testing images
    for img_name in test_images:
        src = os.path.join(class_path, img_name)
        dst = os.path.join(TARGET_DIR, 'test', class_name, img_name)
        shutil.copyfile(src, dst)
print("✅ Dataset split complete!")

In [None]:
import os
import torch
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Set dataset paths (update if needed)
TRAIN_PATH = 'dataset_split/train'
TEST_PATH = 'dataset_split/test'

# Define image transforms: resize, grayscale, tensor, normalize
transform_custom = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load datasets
train_set = torchvision.datasets.ImageFolder(root=TRAIN_PATH, transform=transform_custom)
test_set = torchvision.datasets.ImageFolder(root=TEST_PATH, transform=transform_custom)

# Print dataset info
print("Classes:", train_set.classes)
print("Train samples:", len(train_set))
print("Test samples:", len(test_set))

# Show example images (2 per class)
def show_2x6_grid(dataset, n_per_class=2, title="Example Grid"):
    class_counts = {i: 0 for i in range(len(dataset.classes))}
    collected = {i: [] for i in range(len(dataset.classes))}
        
    for img, label in dataset:
        if class_counts[label] < n_per_class:
            collected[label].append(img)
            class_counts[label] += 1
        if all(c >= n_per_class for c in class_counts.values()):
            break

    fig, axes = plt.subplots(n_per_class, len(dataset.classes), figsize=(len(dataset.classes)*2, n_per_class*2))
    for col, imgs in collected.items():
        for row in range(n_per_class):
            ax = axes[row][col] if n_per_class > 1 else axes[col]
            img = imgs[row].numpy().transpose(1, 2, 0) * 0.5 + 0.5 # unnormalize
            ax.imshow(img.squeeze(), cmap='gray')
            ax.set_title(dataset.classes[col], fontsize=8)
            ax.axis('off')
        
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

# Show training and test grids
show_2x6_grid(train_set, 2, "Train Set Grid")
show_2x6_grid(test_set, 2, "Test Set Grid")


In [None]:
import torch
import torch.nn as nn
from einops import rearrange, repeat, einsum

# === Helper ===
def pair(t):
    return t if isinstance(t, tuple) else (t, t)
# === TODO 1: Define PreNorm block ===
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        # TODO: initialize LayerNorm and store fn

    def forward(self, x,**kwargs):
        # TODO: apply LayerNorm + fn
        pass

# === TODO 2: Define FeedForward block ===
class FeedForward(nn.Module):
    def __init__(self, dim, mlp_dim, dropout=0.):
        super().__init__()
        # TODO: define two Linear layers + activation + dropout
        
    def forward(self, x):
        # TODO: apply layers
        pass

# === TODO 3: Define Attention block ===
class Attention(nn.Module):
    def __init__(self, dim, heads=4, dim_head=64, dropout=0.):
        super().__init__()
        # TODO: set up qkv projections, softmax attention, final output projection
        
def forward(self, x):
    # TODO: compute q, k, v, attention, and output
    pass
# === TODO 4: Define Transformer Encoder ===
class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
        super().__init__()
        # TODO: stack multiple PreNorm + Attention + FeedForward layers
        
    def forward(self, x):
        # TODO: pass through each Transformer layer
        pass

# === TODO 5: Define Vision Transformer (ViT) ===
class ViT(nn.Module):
    def __init__(self, *, image_size, patch_size, num_classes, channels, dim, depth, heads, mlp_dim, dropout=0.):
        super().__init__()
        # TODO: calculate patch numbers, set up patch embedding, positional embedding, cls token, transformer, mlp head
    
    def forward(self, img):
        # TODO: apply patch embedding, add cls token + pos embedding, run transformer, pool, mlp head
        pass

# === TODO 6: Initialize model ===
# Example hyperparameters (students should adjust!)
model = ViT(
    image_size=28,
    patch_size=4,
    num_classes=6,
    channels=1,
    dim=64,
    depth=6,
    heads=4,
    mlp_dim=128
)

# === TODO 7: Set up optimizer ===
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [None]:
# === TODO 8: Print or summarize the model ===
print(model)
# Optionally: from torchsummary import summary
# summary(model, input_size=(1, 28, 28))

In [None]:
# === TODO 1: Define training epoch ===
def train_epoch(model, optimizer, data_loader, loss_history):
    model.train()
    total_samples = len(data_loader.dataset)
    
    for i, (data, target) in enumerate(data_loader):
    # TODO: Zero gradients
    # TODO: Forward pass
    # TODO: Compute loss
    # TODO: Backward pass and optimizer step
        if i % 100 == 0:
        # TODO: Print progress info and save loss
        pass

# === TODO 2: Define evaluation function ===
def evaluate(model, data_loader, loss_history):
    model.eval()
    total_samples = len(data_loader.dataset)
    correct_samples = 0
    total_loss = 0

    with torch.no_grad():
        for data, target in data_loader:
            # TODO: Forward pass
            # TODO: Compute loss
            # TODO: Get predictions and count correct samples
            
    # TODO: Compute average loss and accuracy
    # TODO: Print evaluation summary

In [None]:
# === SET EPOCHS ===
N_EPOCHS = 1

# === START TIMER ===
start_time = time.time()

# === INIT LOSS TRACKERS ===
train_loss_history, test_loss_history = [], []

# === MAIN TRAINING LOOP ===
for epoch in range(1, N_EPOCHS + 1):
    print('Epoch:', epoch)
    train_epoch(model, optimizer, train_loader, train_loss_history)
    evaluate(model, test_loader, test_loss_history)

# === PRINT TOTAL TIME ===
print('Execution time:', '{:5.2f}'.format(time.time() - start_time), 'seconds')

# === SAVE TRAINED MODEL ===
torch.save(model.state_dict(), 'Student_ID.pth') # replace with your actual Student ID
print("✅ Model saved as .pth")

In [None]:
# LOAD MODEL - if needed
# Make sure you define the same ViT model structure first
model = ViT(image_size=28, patch_size=4, num_classes=6, channels=1, dim=64, depth=6, heads=4,
mlp_dim=128)

# Load saved weights
model.load_state_dict(torch.load('Student_ID.pth'))
model.eval()
print("✅ Model loaded and ready for testing")

In [None]:
import torch
# === TODO: Define function to plot confusion matrix ===
def plot_confusion_matrix(model, data_loader, class_names):
    # HINT:
    # - Get predictions and true labels
    # - Compute confusion matrix (sklearn)
    # - Plot with seaborn heatmap
    pass

# === TODO: Define function to plot example predictions ===
def plot_classwise_predictions(model, data_loader, class_names, samples_per_class=4):
    # HINT:
    # - Collect a few correct/incorrect predictions per class
    # - Plot grid of images with true vs predicted labels
    pass

# === TODO: After training, call both functions ===
# plot_confusion_matrix(model, test_loader, train_set.classes)
# plot_classwise_predictions(model, test_loader, train_set.classes, samples_per_class=4)