In [1]:
import wandb
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 

import json 
import torch
print(f"Using GPU: {torch.cuda.get_device_name(0)}")

import pandas as pd

from handsoncv.datasets import CILPFusionDataset
from handsoncv.models import IntermediateFusionNet
from handsoncv.training import train_fusion_model
from torchvision import transforms
from torch.utils.data import DataLoader

ROOT_PATH = "~/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-2/"
MOUNTED_ROOT_PATH = os.path.expanduser(ROOT_PATH)
ROOT_DATA = "~/Documents/repos/BuildingAIAgentsWithMultimodalModels/data/assessment/"
IMG_SIZE = 64
BATCH_SIZE = 32

Using GPU: NVIDIA GeForce RTX 3090


In [2]:
# Load split dictionary previouslu created with 01_dataset_exploration.ipynb
mapping_file = "subset_splits.json"
with open(f"{MOUNTED_ROOT_PATH}/{mapping_file}", "r") as f:
    splits = json.load(f)
    
torch.manual_seed(splits["seed"])

# Instantiate Dataset
img_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),  # Scales data into [0,1]
])

train_ds = CILPFusionDataset(root_dir=ROOT_DATA, sample_ids=splits["train"], transform=img_transforms)
val_ds = CILPFusionDataset(root_dir=ROOT_DATA, sample_ids=splits["val"], transform=img_transforms)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

print(f"Ready to train with {len(train_ds)} training pairs and {len(val_ds)} validation pairs.")

Ready to train with 4799 training pairs and 1200 validation pairs.


In [3]:
###################################################################
# Sanity Check - Ensure no data leakage between train and val sets
###################################################################

assert set(train_ds.sample_ids).isdisjoint(set(val_ds.sample_ids)), "DATA LEAKAGE DETECTED!"

leaked_ids = set(train_ds.sample_ids).intersection(set(val_ds.sample_ids))
print(f"Found {len(leaked_ids)} overlapping IDs.")
print(f"Example leaked IDs: {list(leaked_ids)[:10]}")

Found 0 overlapping IDs.
Example leaked IDs: []


In [4]:
# Configuration to fufill logging requirement
EPOCHS = 20
LEARNING_RATE = 1e-4
SUBSET_SIZE = len(train_ds) + len(val_ds) 
INTERM_FUSION_EMB_DIM = 200

# Define Ablation Suite
experiments = [
    ("MaxPool2d (Baseline)", IntermediateFusionNet(mode='add', emb_dim_interm=INTERM_FUSION_EMB_DIM, downsample_mode='maxpool'), "maxpool"),
    ("Strided Conv (Ablation)", IntermediateFusionNet(mode='add', emb_dim_interm=INTERM_FUSION_EMB_DIM, downsample_mode='stride'), "stride")
]

ablation_results = []

for name, model, mode_tag in experiments:
    run = wandb.init(
        project="handsoncv-maxpoolvsstride",
        name=name,
        config={
            "architecture": "Int Fusion Add",
            "downsample_mode": mode_tag,
            "learning_rate": LEARNING_RATE,
            "epochs": EPOCHS,
            "fusion_strategy": "intermediate_add"
        }
    )
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    
    print(f"\nRunning Experiment: {name}")
    metrics = train_fusion_model(
        model, train_loader, val_loader, 
        optimizer, torch.nn.CrossEntropyLoss(), 
        device="cuda", epochs=EPOCHS, scheduler=scheduler
    )
    
    # Store results for the final table
    # Store for local summary table
    metrics['Variant'] = mode_tag
    metrics['Parameters'] = sum(p.numel() for p in model.parameters() if p.requires_grad)
    ablation_results.append(metrics)

    wandb.finish()

# --- Final Comparison Table (Task 4.2) ---
# Create DataFrame and reorder columns
df_abl = pd.DataFrame(ablation_results).set_index("Variant")
# Calculate diff column 
df_abl.loc['Difference'] = df_abl.loc['stride'] - df_abl.loc['maxpool']

# Display the table
print("\n" + "="*50)
print("TASK 4 COMPARISON TABLE")
print("="*50)
print(df_abl)

[34m[1mwandb[0m: Currently logged in as: [33mguarino-vanessa-emanuela[0m ([33mhandsoncv-research[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



Running Experiment: MaxPool2d (Baseline)
Epoch 0: Val Loss: 0.5119, Acc: 73.75% | Mem: 299.2MB
Epoch 1: Val Loss: 0.3917, Acc: 81.50% | Mem: 299.2MB
Epoch 2: Val Loss: 0.2074, Acc: 90.00% | Mem: 299.2MB
Epoch 3: Val Loss: 0.0309, Acc: 98.00% | Mem: 299.2MB
Epoch 4: Val Loss: 0.0102, Acc: 98.67% | Mem: 299.2MB
Epoch 5: Val Loss: 0.0059, Acc: 98.58% | Mem: 299.2MB
Epoch 6: Val Loss: 0.0096, Acc: 98.58% | Mem: 299.2MB
Epoch 7: Val Loss: 0.0023, Acc: 98.67% | Mem: 299.2MB
Epoch 8: Val Loss: 0.0023, Acc: 98.67% | Mem: 299.2MB
Epoch 9: Val Loss: 0.0013, Acc: 98.67% | Mem: 299.2MB
Epoch 10: Val Loss: 0.0012, Acc: 98.67% | Mem: 299.2MB
Epoch 11: Val Loss: 0.0011, Acc: 98.67% | Mem: 299.2MB
Epoch 12: Val Loss: 0.0009, Acc: 98.67% | Mem: 299.2MB
Epoch 13: Val Loss: 0.0010, Acc: 98.67% | Mem: 299.2MB
Epoch 14: Val Loss: 0.0009, Acc: 98.67% | Mem: 299.2MB
Epoch 15: Val Loss: 0.0010, Acc: 98.67% | Mem: 299.2MB
Epoch 16: Val Loss: 0.0008, Acc: 98.67% | Mem: 299.2MB
Epoch 17: Val Loss: 0.0008, Acc: 

0,1
accuracy,▁▃▆█████████████████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch_time_sec,▆▃▃▃▂▂▁▁▁▁▃▃▃▃▃▄██▅▃
learning_rate,████▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
peak_gpu_mem_mb,▁███████████████████
train_loss,█▆▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,98.66667
epoch,19.0
epoch_time_sec,5.3339
learning_rate,0.0
peak_gpu_mem_mb,299.16455
train_loss,0.00019
val_loss,0.00081



Running Experiment: Strided Conv (Ablation)
Epoch 0: Val Loss: 0.4776, Acc: 75.08% | Mem: 348.5MB
Epoch 1: Val Loss: 0.3979, Acc: 80.83% | Mem: 348.5MB
Epoch 2: Val Loss: 0.2937, Acc: 88.25% | Mem: 348.5MB
Epoch 3: Val Loss: 0.2191, Acc: 90.83% | Mem: 348.5MB
Epoch 4: Val Loss: 0.2712, Acc: 88.25% | Mem: 348.5MB
Epoch 5: Val Loss: 0.1786, Acc: 92.83% | Mem: 348.5MB
Epoch 6: Val Loss: 0.1663, Acc: 93.00% | Mem: 348.5MB
Epoch 7: Val Loss: 0.0990, Acc: 95.50% | Mem: 348.5MB
Epoch 8: Val Loss: 0.0957, Acc: 95.17% | Mem: 348.5MB
Epoch 9: Val Loss: 0.0486, Acc: 97.33% | Mem: 348.5MB
Epoch 10: Val Loss: 0.0611, Acc: 96.83% | Mem: 348.5MB
Epoch 11: Val Loss: 0.0243, Acc: 97.83% | Mem: 348.5MB
Epoch 12: Val Loss: 0.0200, Acc: 98.17% | Mem: 348.5MB
Epoch 13: Val Loss: 0.0191, Acc: 98.00% | Mem: 348.5MB
Epoch 14: Val Loss: 0.0143, Acc: 98.25% | Mem: 348.5MB
Epoch 15: Val Loss: 0.0123, Acc: 98.33% | Mem: 348.5MB
Epoch 16: Val Loss: 0.0118, Acc: 98.42% | Mem: 348.5MB
Epoch 17: Val Loss: 0.0116, Ac

0,1
accuracy,▁▃▅▆▅▆▆▇▇███████████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch_time_sec,█▃▂▂▂▂▂▂▂▂▁▁▁▁▂▁▁▁▁▃
learning_rate,████▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
peak_gpu_mem_mb,▁███████████████████
train_loss,█▆▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▅▄▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
accuracy,98.33333
epoch,19.0
epoch_time_sec,5.83751
learning_rate,0.0
peak_gpu_mem_mb,348.49121
train_loss,0.00635
val_loss,0.0116



TASK 4 COMPARISON TABLE
            val_loss   accuracy     params  total_time_sec  sec_per_epoch  \
Variant                                                                     
maxpool     0.000805  98.666667  7074334.0      112.630120       5.405638   
stride      0.011603  98.333333  8020034.0      117.493022       5.684384   
Difference  0.010798  -0.333333   945700.0        4.862902       0.278746   

            gpu_mem_mb  Parameters  
Variant                             
maxpool     299.164551   7074334.0  
stride      348.491211   8020034.0  
Difference   49.326660    945700.0  
