In [1]:
import wandb
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 

import json 
import torch
print(f"Using GPU: {torch.cuda.get_device_name(0)}")

import pandas as pd

from handsoncv.datasets import CILPFusionDataset
from handsoncv.models import LateFusionNet, IntermediateFusionNet
from handsoncv.training import train_fusion_model
from torchvision import transforms
from torch.utils.data import DataLoader

ROOT_PATH = "~/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-2/"
MOUNTED_ROOT_PATH = os.path.expanduser(ROOT_PATH)
ROOT_DATA = "~/Documents/repos/BuildingAIAgentsWithMultimodalModels/data/assessment/"
IMG_SIZE = 64
BATCH_SIZE = 32

Using GPU: NVIDIA GeForce RTX 3090


In [2]:
# Load split dictionary previouslu created with 01_dataset_exploration.ipynb
mapping_file = "subset_splits.json"
with open(f"{MOUNTED_ROOT_PATH}/{mapping_file}", "r") as f:
    splits = json.load(f)
    
torch.manual_seed(splits["seed"])

# Instantiate Dataset
img_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),  # Scales data into [0,1]
])

train_ds = CILPFusionDataset(root_dir=ROOT_DATA, sample_ids=splits["train"], transform=img_transforms)
val_ds = CILPFusionDataset(root_dir=ROOT_DATA, sample_ids=splits["val"], transform=img_transforms)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

print(f"Ready to train with {len(train_ds)} training pairs and {len(val_ds)} validation pairs.")

Ready to train with 4799 training pairs and 1200 validation pairs.


In [3]:
###################################################################
# Sanity Check - Ensure no data leakage between train and val sets
###################################################################

assert set(train_ds.sample_ids).isdisjoint(set(val_ds.sample_ids)), "DATA LEAKAGE DETECTED!"

leaked_ids = set(train_ds.sample_ids).intersection(set(val_ds.sample_ids))
print(f"Found {len(leaked_ids)} overlapping IDs.")
print(f"Example leaked IDs: {list(leaked_ids)[:10]}")

Found 0 overlapping IDs.
Example leaked IDs: []


In [None]:
# Configuration to fufill logging requirement
EPOCHS = 20
LEARNING_RATE = 1e-4
SUBSET_SIZE = len(train_ds) + len(val_ds) 
LATE_FUSION_EMB_DIM = 2
INTERM_FUSION_EMB_DIM = 200

# Define Experiment Suite
strategies = [
    ("Late Fusion", LateFusionNet(emb_dim_interm=INTERM_FUSION_EMB_DIM, emb_dim_late=LATE_FUSION_EMB_DIM), "late"),
    ("Int Fusion Concat", IntermediateFusionNet(mode='concat', emb_dim_interm=INTERM_FUSION_EMB_DIM), "intermediate_concat"),
    ("Int Fusion Add", IntermediateFusionNet(mode='add', emb_dim_interm=INTERM_FUSION_EMB_DIM), "intermediate_add"),
    ("Int Fusion Mul", IntermediateFusionNet(mode='mul', emb_dim_interm=INTERM_FUSION_EMB_DIM), "intermediate_mul"),
]

results = []

for name, model, strategy_type in strategies:
    current_emb_size = LATE_FUSION_EMB_DIM if strategy_type == "late" else INTERM_FUSION_EMB_DIM
    run = wandb.init(
        project="handsoncv-fusion", 
        name=name,
        config={
            "architecture": name,
            "fusion_strategy": strategy_type,
            "embedding_size": current_emb_size,
            "learning_rate": LEARNING_RATE,
            "batch_size": BATCH_SIZE,
            "epochs": EPOCHS,
            "optimizer_type": "Adam",
            "subset_size": SUBSET_SIZE,
            "seed": splits["seed"]
        }
    )
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) #T_max set to the total number of epochs
    
    print(f"Training {name}...")
    
    metrics = train_fusion_model(
        model, 
        train_loader, 
        val_loader, 
        optimizer=optimizer,
        criterion=torch.nn.CrossEntropyLoss(),
        device="cuda" if torch.cuda.is_available() else "cpu",
        epochs=EPOCHS,
        scheduler=scheduler
    )
    
    metrics['Architecture'] = name
    results.append(metrics)
    wandb.finish()

# --- Final Comparison Table (Task 3.4) ---
# Create DataFrame and reorder columns to match assignment table
df = pd.DataFrame(results)
cols = ["Architecture", "val_loss", "accuracy", "params", "sec_per_epoch", "gpu_mem_mb"]
comparison_table = df[cols]

# Display the table
print("\n" + "="*60)
print("FINAL FUSION COMPARISON TABLE")
print("="*60)
print(comparison_table.to_string(index=False))

[34m[1mwandb[0m: Currently logged in as: [33mguarino-vanessa-emanuela[0m ([33mhandsoncv-research[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Training Late Fusion...
Epoch 0: Val Loss: 0.4903, Acc: 74.67% | Mem: 377.8MB
Epoch 1: Val Loss: 0.4749, Acc: 75.42% | Mem: 377.8MB
Epoch 2: Val Loss: 0.5300, Acc: 73.33% | Mem: 377.8MB
Epoch 3: Val Loss: 0.2916, Acc: 87.58% | Mem: 377.8MB
Epoch 4: Val Loss: 0.2496, Acc: 89.50% | Mem: 377.8MB
Epoch 5: Val Loss: 0.1768, Acc: 92.08% | Mem: 377.8MB
Epoch 6: Val Loss: 0.1145, Acc: 94.17% | Mem: 377.8MB
Epoch 7: Val Loss: 0.0375, Acc: 97.92% | Mem: 377.8MB
Epoch 8: Val Loss: 0.0401, Acc: 97.17% | Mem: 377.8MB
Epoch 9: Val Loss: 0.0128, Acc: 98.25% | Mem: 377.8MB
Epoch 10: Val Loss: 0.0087, Acc: 98.50% | Mem: 377.8MB
Epoch 11: Val Loss: 0.0074, Acc: 98.42% | Mem: 377.8MB
Epoch 12: Val Loss: 0.0074, Acc: 98.50% | Mem: 377.8MB
Epoch 13: Val Loss: 0.0084, Acc: 98.42% | Mem: 377.8MB
Epoch 14: Val Loss: 0.0069, Acc: 98.50% | Mem: 377.8MB
Epoch 15: Val Loss: 0.0075, Acc: 98.50% | Mem: 377.8MB
Epoch 16: Val Loss: 0.0066, Acc: 98.50% | Mem: 377.8MB
Epoch 17: Val Loss: 0.0065, Acc: 98.50% | Mem: 377.

0,1
accuracy,▁▂▁▅▅▆▇█████████████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch_time_sec,█▂▁▁▁▂▂▂▃▂▂▃▃▂▃▃▃▂▄▃
learning_rate,████▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
peak_gpu_mem_mb,▁███████████████████
train_loss,█▆▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▇▇█▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,98.5
epoch,19.0
epoch_time_sec,5.61359
learning_rate,0.0
peak_gpu_mem_mb,377.8252
train_loss,0.00074
val_loss,0.00685


Training Int Fusion Concat...
Epoch 0: Val Loss: 0.5363, Acc: 70.25% | Mem: 505.3MB
Epoch 1: Val Loss: 0.3558, Acc: 83.67% | Mem: 505.3MB
Epoch 2: Val Loss: 0.1254, Acc: 94.92% | Mem: 505.3MB
Epoch 3: Val Loss: 0.0272, Acc: 97.75% | Mem: 505.3MB
Epoch 4: Val Loss: 0.0313, Acc: 97.83% | Mem: 505.3MB
Epoch 5: Val Loss: 0.0092, Acc: 98.42% | Mem: 505.3MB
Epoch 6: Val Loss: 0.0047, Acc: 98.58% | Mem: 505.3MB
Epoch 7: Val Loss: 0.0050, Acc: 98.50% | Mem: 505.3MB
Epoch 8: Val Loss: 0.0049, Acc: 98.58% | Mem: 505.3MB
Epoch 9: Val Loss: 0.0044, Acc: 98.58% | Mem: 505.3MB
Epoch 10: Val Loss: 0.0038, Acc: 98.58% | Mem: 505.3MB
Epoch 11: Val Loss: 0.0032, Acc: 98.58% | Mem: 505.3MB
Epoch 12: Val Loss: 0.0035, Acc: 98.58% | Mem: 505.3MB
Epoch 13: Val Loss: 0.0031, Acc: 98.58% | Mem: 505.3MB
Epoch 14: Val Loss: 0.0034, Acc: 98.58% | Mem: 505.3MB
Epoch 15: Val Loss: 0.0028, Acc: 98.58% | Mem: 505.3MB
Epoch 16: Val Loss: 0.0031, Acc: 98.58% | Mem: 505.3MB
Epoch 17: Val Loss: 0.0026, Acc: 98.58% | Mem

0,1
accuracy,▁▄▇█████████████████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch_time_sec,▁▁▁▁▃▂▃▁▂█▇██▅▅▅▅▇▅▂
learning_rate,████▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
peak_gpu_mem_mb,▁███████████████████
train_loss,█▆▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,98.58333
epoch,19.0
epoch_time_sec,5.46047
learning_rate,0.0
peak_gpu_mem_mb,505.28174
train_loss,0.00018
val_loss,0.00289


Training Int Fusion Add...
Epoch 0: Val Loss: 0.5013, Acc: 75.50% | Mem: 510.4MB
Epoch 1: Val Loss: 0.3727, Acc: 82.83% | Mem: 510.4MB
Epoch 2: Val Loss: 0.0948, Acc: 95.42% | Mem: 510.4MB
Epoch 3: Val Loss: 0.0190, Acc: 98.42% | Mem: 510.4MB
Epoch 4: Val Loss: 0.0361, Acc: 97.67% | Mem: 510.4MB
Epoch 5: Val Loss: 0.0057, Acc: 98.50% | Mem: 510.4MB
Epoch 6: Val Loss: 0.0024, Acc: 98.67% | Mem: 510.4MB
Epoch 7: Val Loss: 0.0025, Acc: 98.58% | Mem: 510.4MB
Epoch 8: Val Loss: 0.0026, Acc: 98.50% | Mem: 510.4MB
Epoch 9: Val Loss: 0.0026, Acc: 98.58% | Mem: 510.4MB
Epoch 10: Val Loss: 0.0016, Acc: 98.58% | Mem: 510.4MB
Epoch 11: Val Loss: 0.0019, Acc: 98.58% | Mem: 510.4MB
Epoch 12: Val Loss: 0.0018, Acc: 98.58% | Mem: 510.4MB
Epoch 13: Val Loss: 0.0022, Acc: 98.58% | Mem: 510.4MB
Epoch 14: Val Loss: 0.0021, Acc: 98.58% | Mem: 510.4MB
Epoch 15: Val Loss: 0.0015, Acc: 98.58% | Mem: 510.4MB
Epoch 16: Val Loss: 0.0015, Acc: 98.58% | Mem: 510.4MB
Epoch 17: Val Loss: 0.0016, Acc: 98.58% | Mem: 5

0,1
accuracy,▁▃▇█████████████████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch_time_sec,█▅▅▅▅▅▆▄▄▄▂▃▁▁▂▃▃▃▂▃
learning_rate,████▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
peak_gpu_mem_mb,▁███████████████████
train_loss,█▆▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,98.58333
epoch,19.0
epoch_time_sec,5.23227
learning_rate,0.0
peak_gpu_mem_mb,510.44971
train_loss,0.00018
val_loss,0.00149


Training Int Fusion Mul...
Epoch 0: Val Loss: 0.4192, Acc: 79.33% | Mem: 565.7MB
Epoch 1: Val Loss: 0.1766, Acc: 92.75% | Mem: 565.7MB
Epoch 2: Val Loss: 0.0432, Acc: 97.50% | Mem: 565.7MB
Epoch 3: Val Loss: 0.0206, Acc: 98.33% | Mem: 565.7MB
Epoch 4: Val Loss: 0.0259, Acc: 98.00% | Mem: 565.7MB
Epoch 5: Val Loss: 0.0510, Acc: 97.50% | Mem: 565.7MB
Epoch 6: Val Loss: 0.0235, Acc: 98.00% | Mem: 565.7MB
Epoch 7: Val Loss: 0.0188, Acc: 98.25% | Mem: 565.7MB
Epoch 8: Val Loss: 0.0153, Acc: 98.42% | Mem: 565.7MB
Epoch 9: Val Loss: 0.0175, Acc: 98.42% | Mem: 565.7MB
Epoch 10: Val Loss: 0.0159, Acc: 98.42% | Mem: 565.7MB
Epoch 11: Val Loss: 0.0193, Acc: 98.42% | Mem: 565.7MB
Epoch 12: Val Loss: 0.0184, Acc: 98.42% | Mem: 565.7MB
Epoch 13: Val Loss: 0.0178, Acc: 98.42% | Mem: 565.7MB
Epoch 14: Val Loss: 0.0197, Acc: 98.42% | Mem: 565.7MB
Epoch 15: Val Loss: 0.0197, Acc: 98.42% | Mem: 565.7MB
Epoch 16: Val Loss: 0.0196, Acc: 98.42% | Mem: 565.7MB
Epoch 17: Val Loss: 0.0196, Acc: 98.42% | Mem: 5

0,1
accuracy,▁▆██████████████████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch_time_sec,▁▁▁▂███▄▂▂▂▂▂▂▂▂▂▁▁▂
learning_rate,████▇▇▇▆▆▅▄▄▃▃▂▂▂▁▁▁
peak_gpu_mem_mb,▁███████████████████
train_loss,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,98.41667
epoch,19.0
epoch_time_sec,5.24821
learning_rate,0.0
peak_gpu_mem_mb,565.68018
train_loss,0.0001
val_loss,0.01961



FINAL FUSION COMPARISON TABLE
     Architecture  val_loss  accuracy   params  sec_per_epoch  gpu_mem_mb
      Late Fusion  0.006845 98.500000 13694510       5.568408  377.825195
Int Fusion Concat  0.002893 98.583333 13627934       5.731746  505.281738
   Int Fusion Add  0.001489 98.583333  7074334       5.266408  510.449707
   Int Fusion Mul  0.019606 98.416667  7074334       5.377609  565.680176
