In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
import time

# Ensure the src directory is in the Python path
# Adjust the path '..' if your notebook is in a different location relative to src
module_path = os.path.abspath(os.path.join('..', 'src'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import modules from src
from utils import load_processed_data
from models import Model_1, Model_2, Model_3

In [None]:
# --- Configuration ---
BATCH_SIZE = 128 # Reasonable batch size (can try 64, 256)
LEARNING_RATE = 1e-3 # A common default starting LR for Adam
N_MINIBATCHES = 15
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {DEVICE}")

In [None]:
# --- Load Data ---
print("Loading data...")
X_train, y_train, X_val, y_val, _, _ = load_processed_data()
print("Data loaded.")

In [5]:
# Create datasets
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

In [6]:
# Create dataloaders
# Use shuffle=True for training to ensure batches are different each epoch
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# No need to shuffle validation data
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2) # Larger batch size for faster validation

In [7]:
# --- Define Training and Evaluation Functions ---

def train_one_step(model, batch, criterion, optimizer, device):
    """Performs a single training step (forward pass, loss calc, backward pass, optimizer step)."""
    model.train() # Set model to training mode
    inputs, targets = batch
    inputs, targets = inputs.to(device), targets.to(device)

    # Zero gradients
    optimizer.zero_grad()

    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Backward pass and optimize
    loss.backward()
    optimizer.step()

    return loss.item()

In [8]:
def evaluate(model, loader, criterion, device):
    """Evaluates the model on the given data loader."""
    model.eval() # Set model to evaluation mode
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad(): # Disable gradient calculations during evaluation
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            total_loss += loss.item() * inputs.size(0) # Accumulate loss weighted by batch size

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_samples += targets.size(0)
            correct_predictions += (predicted == targets).sum().item()

    avg_loss = total_loss / total_samples
    accuracy = correct_predictions / total_samples
    return avg_loss, accuracy

In [9]:
# --- Experiment Setup ---
model_architectures = {
    "Model_1 (128x128)": Model_1,
    "Model_2 (256x256)": Model_2,
    "Model_3 (256x128x64)": Model_3
}

results = {} # To store losses per mini-batch and final validation metrics

criterion = nn.CrossEntropyLoss()

In [None]:
# --- Run Experiments ---

for name, ModelClass in model_architectures.items():
    print(f"\n--- Running Experiment for: {name} ---")
    model = ModelClass().to(DEVICE)
    print(model) # Print architecture details

    # Use Adam optimizer with a default learning rate
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # Store losses for each mini-batch for this model
    minibatch_losses = []
    start_time = time.time()

    # Training loop for N_MINIBATCHES
    batch_count = 0
    # Use an iterator to manually fetch batches
    train_iter = iter(train_loader)

    while batch_count < N_MINIBATCHES:
        try:
            batch = next(train_iter)
        except StopIteration:
            # If the dataloader runs out, reset it (shouldn't happen with only 15 batches usually)
            train_iter = iter(train_loader)
            batch = next(train_iter)

        loss = train_one_step(model, batch, criterion, optimizer, DEVICE)
        minibatch_losses.append(loss)
        batch_count += 1
        # Optional: Print progress
        if (batch_count % 5 == 0) or (batch_count == N_MINIBATCHES):
             print(f"  Batch {batch_count}/{N_MINIBATCHES}, Loss: {loss:.4f}")

    end_time = time.time()
    training_time = end_time - start_time

    # Evaluate on the validation set after 15 mini-batches
    print(f"Evaluating {name} on validation set...")
    val_loss, val_accuracy = evaluate(model, val_loader, criterion, DEVICE)
    print(f"  Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    print(f"  Training Time for {N_MINIBATCHES} batches: {training_time:.2f} seconds")

    # Store results
    results[name] = {
        'minibatch_losses': minibatch_losses,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'training_time': training_time
    }


In [None]:
# --- Analysis and Visualization ---

# Plot mini-batch losses
plt.figure(figsize=(12, 6))
for name, data in results.items():
    plt.plot(range(1, N_MINIBATCHES + 1), data['minibatch_losses'], label=f"{name} Train Loss")

In [None]:
plt.xlabel("Mini-batch Number")
plt.ylabel("Training Loss")
plt.title("Training Loss per Mini-batch (First 15 Batches)")
plt.legend()
plt.grid(True)
plt.xticks(range(1, N_MINIBATCHES + 1))
plt.tight_layout()
plt.show()

In [None]:
# Print summary table
print("\n--- Summary of Initial Runs (15 Mini-batches) ---")
print(f"{'Architecture':<25} | {'Final Val Loss':<15} | {'Final Val Accuracy':<18} | {'Training Time (s)':<15}")
print("-" * 80)
for name, data in results.items():
    print(f"{name:<25} | {data['val_loss']:.4f}{' ':<10} | {data['val_accuracy']:.4f}{' ':<13} | {data['training_time']:.2f}")

In [14]:
# --- Select Best Performing Architecture ---
# Based on validation accuracy primarily, and considering loss trajectory
best_model_name = ""
best_val_accuracy = -1.0

for name, data in results.items():
    if data['val_accuracy'] > best_val_accuracy:
        best_val_accuracy = data['val_accuracy']
        best_model_name = name
    # Could add tie-breaking logic using val_loss if needed

In [None]:
print(f"\nBased on initial validation accuracy, the best performing architecture appears to be: {best_model_name}")
print("(Note: This is based on very limited training. Further training is needed for confirmation)")