In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms.v2 as transforms 
from torch.utils.data import random_split

from modules import data_loader
from modules.networks import VarResNet
from modules.networks import Net


## Question 14
see data_loader.py for how to load the data as three tensors

In [11]:
root = "../data/mnist-varres/train"
buckets = data_loader.load_sorted_data(root)

In [12]:
#split into training and validation 
train_buckets = []
val_buckets = []
torch.manual_seed(42)

for inputs, labels in buckets:

    n_total = len(inputs)
    n_val = int(n_total * 0.2)
    n_train = n_total - n_val
    

    indices = torch.randperm(n_total)
    # Slice using indices
    train_x = inputs[indices[:n_train]]
    train_y = labels[indices[:n_train]]
    
    val_x = inputs[indices[n_train:]]
    val_y = labels[indices[n_train:]]
    
    train_buckets.append((train_x, train_y))
    val_buckets.append((val_x, val_y))

Training loop with inner loop over the three dimensions

In [13]:
model = VarResNet()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

epochs = 2
batch_size = 16

# Calculate min samples to prevent index out of bounds
min_samples = min(len(b[0]) for b in train_buckets)

print("Starting Training...")

for epoch in range(epochs):
    running_loss = 0.0
    
    # shuffle indices 
    bucket_indices = [torch.randperm(len(b[0])) for b in train_buckets]
    
    # loop over batches
    for i in range(0, min_samples, batch_size):
        
        # loop over resolutions
        for bucket_idx, (inputs_full, labels_full) in enumerate(train_buckets):

            indices = bucket_indices[bucket_idx][i : i + batch_size]
            inputs = inputs_full[indices]
            labels = labels_full[indices]
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

    # average: total loss / (batches * resolutions)
    avg_loss = running_loss / ((min_samples // batch_size) * 3)
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

print("Finished Training")

Starting Training...
Epoch 1, Loss: 0.2631
Epoch 2, Loss: 0.0826
Finished Training


## Question 15
find the value of N for which both networks have roughly the same number of parameters

In [17]:
def count_params(model):
    total_params = 0

    for param in model.parameters():
        num_elements = param.numel()
        total_params += num_elements
        
    return total_params

# target_params = (slope * n) + intercept
# Rearrange:
# n = (target_params - intercept) / slope

target_params = count_params(Net())
params_at_1 = count_params(VarResNet(n_channels=1))
params_at_2 = count_params(VarResNet(n_channels=2))
slope = params_at_2 - params_at_1
intercept = params_at_1 - slope

n = (target_params - intercept) / slope
print(n)

# So, rounded:
optimal_n=round(n)

# check
print(f"Fixed Model: {count_params(Net())} params")
print(f"VarRes Model: {count_params(VarResNet())} params")


81.123745819398
Fixed Model: 29066 params
VarRes Model: 29029 params


## Question 16
Compare the validation performance of global max pooling to that of global mean pooling. Report your findings, and choose a global pooling variant.

In [None]:
# to make it cleaner, function for training:

def run_training(pooling_type, epochs=3, lr=0.001):
    print(f"\nTraining with Global {pooling_type.upper()} Pooling")
    
    # Setup
    model = VarResNet(n_channels=81, pooling=pooling_type)
    optimizer = optim.Adam(model.parameters(), lr)
    criterion = nn.CrossEntropyLoss()
    
    min_samples = min(len(b[0]) for b in train_buckets)
    batch_size = 16

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        
        # Shuffle indices 
        bucket_indices = [torch.randperm(len(b[0])) for b in train_buckets]
        
        # Training loop
        for i in range(0, min_samples, batch_size):
            for bucket_idx, (inputs_full, labels_full) in enumerate(train_buckets):
                indices = bucket_indices[bucket_idx][i : i + batch_size]
                
                optimizer.zero_grad()
                outputs = model(inputs_full[indices])
                loss = criterion(outputs, labels_full[indices])
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_buckets:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_acc = 100 * correct / total
        avg_loss = running_loss / ((min_samples // batch_size) * 3)
        print(f"Epoch {epoch+1}: Train Loss {avg_loss:.4f}, Val Acc {val_acc:.2f}%")
        
    return val_acc

In [19]:
acc_max = run_training('max')
acc_mean = run_training('mean')


Training with Global MAX Pooling
Epoch 1: Train Loss 0.2635, Val Acc 95.81%
Epoch 2: Train Loss 0.0756, Val Acc 97.72%
Epoch 3: Train Loss 0.0539, Val Acc 98.31%

Training with Global MEAN Pooling
Epoch 1: Train Loss 0.7564, Val Acc 91.73%
Epoch 2: Train Loss 0.2681, Val Acc 93.50%
Epoch 3: Train Loss 0.1920, Val Acc 95.28%


## Question 17
comparing fixed resolution network with variable resolution

In [None]:
### Fixed model 

root = "../data/mnist-varres"
transform_fixed = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((28,28)),
    transforms.ToTensor()
])

full_train_fixed = torchvision.datasets.ImageFolder(root=root + "/train", transform=transform_fixed)

# Split train/val
train_size = int(0.8 * len(full_train_fixed))
val_size = len(full_train_fixed) - train_size
train_set_fixed, val_set_fixed = random_split(full_train_fixed, [train_size, val_size], generator=torch.Generator().manual_seed(42))

# Loaders
batch_size = 16
train_loader_fixed = torch.utils.data.DataLoader(train_set_fixed, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader_fixed   = torch.utils.data.DataLoader(val_set_fixed,   batch_size=batch_size, shuffle=False, num_workers=2)

# Load test set (for final comparison)
test_set_fixed = torchvision.datasets.ImageFolder(root=root + "/test", transform=transform_fixed)
test_loader_fixed = torch.utils.data.DataLoader(test_set_fixed, batch_size=batch_size, shuffle=False, num_workers=2)

model_fixed = Net() 



In [7]:
## var model

all_buckets = data_loader.load_sorted_data(data_root=root + "/train")

train_buckets = []
val_buckets = []
torch.manual_seed(42)

for inputs, labels in all_buckets:
    n_total = len(inputs)
    n_val = int(n_total * 0.2)
    n_train = n_total - n_val
    
    indices = torch.randperm(n_total)
    
    train_buckets.append((inputs[indices[:n_train]], labels[indices[:n_train]]))
    val_buckets.append((inputs[indices[n_train:]], labels[indices[n_train:]]))

# Load test buckets (for final comparison)
test_buckets_var = data_loader.load_sorted_data(data_root=root + "/test")

model_var = VarResNet(n_channels=81, pooling='max')

In [11]:
def train_fixed_res(model, train_loader, val_loader, epochs=3, lr=0.001):
    print(f"\nTraining FixedResNet")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr) 

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_acc = 100 * correct / total
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch + 1}: Train Loss {avg_loss:.4f}, Val Acc {val_acc:.2f}%")

In [9]:
def train_var_res(model, train_data, val_data, epochs=3, lr=0.001):
    print(f"\nTraining VarResNet")
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    min_samples = min(len(b[0]) for b in train_data)
    batch_size = 16

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        bucket_indices = [torch.randperm(len(b[0])) for b in train_data]
        
        for i in range(0, min_samples, batch_size):
            for bucket_idx, (inputs_full, labels_full) in enumerate(train_data):
                indices = bucket_indices[bucket_idx][i : i + batch_size]
                
                optimizer.zero_grad()
                outputs = model(inputs_full[indices])
                loss = criterion(outputs, labels_full[indices])
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_data:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        avg_loss = running_loss / ((min_samples // batch_size) * len(train_data))
        print(f"Epoch {epoch+1}: Train Loss {avg_loss:.4f}, Val Acc {val_acc:.2f}%")

In [12]:
train_fixed_res(model_fixed, train_loader_fixed, val_loader_fixed, epochs=5, lr=0.0003)
train_var_res(model_var, train_buckets, val_buckets, epochs=5, lr=0.0003)


Training FixedResNet
Epoch 1: Train Loss 0.9472, Val Acc 82.88%
Epoch 2: Train Loss 0.4559, Val Acc 88.97%
Epoch 3: Train Loss 0.3331, Val Acc 91.42%
Epoch 4: Train Loss 0.2696, Val Acc 92.46%
Epoch 5: Train Loss 0.2264, Val Acc 93.19%

Training VarResNet
Epoch 1: Train Loss 0.4333, Val Acc 94.09%
Epoch 2: Train Loss 0.1286, Val Acc 97.04%
Epoch 3: Train Loss 0.0975, Val Acc 97.23%
Epoch 4: Train Loss 0.0766, Val Acc 97.44%
Epoch 5: Train Loss 0.0650, Val Acc 97.70%


In [13]:
# Test fixed
correct = 0
total = 0
model_fixed.eval()
with torch.no_grad():
    for inputs, labels in test_loader_fixed:
        outputs = model_fixed(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Fixed Resolution Test Accuracy: {100 * correct / total:.2f}%")

# Test var
correct = 0
total = 0
model_var.eval()
with torch.no_grad():
    for inputs, labels in test_buckets_var:
        outputs = model_var(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Variable Resolution Test Accuracy: {100 * correct / total:.2f}%")

Fixed Resolution Test Accuracy: 94.12%
Variable Resolution Test Accuracy: 97.95%
