In [1]:
import wandb
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 

import json 
import torch
print(f"Using GPU: {torch.cuda.get_device_name(0)}")

import pandas as pd

from handsoncv.datasets import CILPFusionDataset
from handsoncv.models import LidarClassifier, CILPModel, CrossModalProjector, RGB2LiDARClassifier
from handsoncv.training import train_fusion_cilp_model
from torchvision import transforms
from torch.utils.data import DataLoader

ROOT_PATH = "~/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-2/"
MOUNTED_ROOT_PATH = os.path.expanduser(ROOT_PATH)
ROOT_DATA = "~/Documents/repos/BuildingAIAgentsWithMultimodalModels/data/assessment/"
IMG_SIZE = 64
BATCH_SIZE = 32

Using GPU: NVIDIA GeForce RTX 3090


In [2]:
# Load split dictionary previouslu created with 01_dataset_exploration.ipynb
mapping_file = "subset_splits.json"
with open(f"{MOUNTED_ROOT_PATH}/{mapping_file}", "r") as f:
    splits = json.load(f)
    
torch.manual_seed(splits["seed"])

# Instantiate Dataset
img_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),  # Scales data into [0,1]
])

train_ds = CILPFusionDataset(root_dir=ROOT_DATA, sample_ids=splits["train"], transform=img_transforms)
val_ds = CILPFusionDataset(root_dir=ROOT_DATA, sample_ids=splits["val"], transform=img_transforms)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

print(f"Ready to train with {len(train_ds)} training pairs and {len(val_ds)} validation pairs.")

Ready to train with 4799 training pairs and 1200 validation pairs.


In [3]:
###################################################################
# Sanity Check - Ensure no data leakage between train and val sets
###################################################################

assert set(train_ds.sample_ids).isdisjoint(set(val_ds.sample_ids)), "DATA LEAKAGE DETECTED!"

leaked_ids = set(train_ds.sample_ids).intersection(set(val_ds.sample_ids))
print(f"Found {len(leaked_ids)} overlapping IDs.")
print(f"Example leaked IDs: {list(leaked_ids)[:10]}")

train_labels = next(iter(train_loader))[-1].cpu().numpy()
val_labels = next(iter(val_loader))[-1].cpu().numpy()
class_prior_train, class_prior_val = train_labels.mean(), val_labels.mean()

print(f"Class prior average in first training batch: {class_prior_train:.4f}, and validation batch: {class_prior_val:.4f}")

if class_prior_train < 0.01 or class_prior_train > 0.99:
    raise ValueError("The training batch is extremely imbalanced "
        f"(class prior = {class_prior_train:.4f}). "
        "It will cause the model to memorize label ordering. "
        "Please recreate the dataset splits."
    )

Found 0 overlapping IDs.
Example leaked IDs: []
Class prior average in first training batch: 0.5312, and validation batch: 0.5625


In [4]:
# Configuration common to all subsequent steps
SUBSET_SIZE = len(train_ds) + len(val_ds) 
INTERM_FUSION_EMB_DIM = 200

### Step 5.1a: Train the LiDAR-Only Classifier

In [None]:
# Configuration to fufill logging requirement
EPOCHS = 20
LEARNING_RATE = 1e-4

run = wandb.init(
    project="handsoncv-cilp-assessment", 
    name="5.1a_Lidar_Only",
    config={
        "architecture": "LidarClassifier",
        "fusion_strategy": "single_modality",
        "learning_rate": LEARNING_RATE,
        "epochs": EPOCHS,
        "subset_size": SUBSET_SIZE
    }
)

# Instantiate Classifier on LiDAR images only
lidar_model = LidarClassifier(emb_dim_interm=INTERM_FUSION_EMB_DIM).to("cuda")

optimizer = torch.optim.Adam(lidar_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

metrics_lidar = train_fusion_cilp_model(
    lidar_model, 
    train_loader, 
    val_loader, 
    optimizer=optimizer, 
    criterion=torch.nn.CrossEntropyLoss(),
    device="cuda" if torch.cuda.is_available() else "cpu",
    epochs=EPOCHS, 
    scheduler=scheduler, 
    task_mode="lidar-only"
)

wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mguarino-vanessa-emanuela[0m ([33mhandsoncv-research[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 0: Val Loss: 0.6939, Acc: 49.49% | Mem: 138.5MB
Epoch 1: Val Loss: 0.6931, Acc: 50.51% | Mem: 138.5MB
Epoch 2: Val Loss: 0.6935, Acc: 49.49% | Mem: 138.5MB
Epoch 3: Val Loss: 0.6933, Acc: 49.49% | Mem: 138.5MB
Epoch 4: Val Loss: 0.6934, Acc: 49.49% | Mem: 138.5MB
Epoch 5: Val Loss: 0.6935, Acc: 49.49% | Mem: 138.5MB
Epoch 6: Val Loss: 0.6934, Acc: 49.49% | Mem: 138.5MB


KeyboardInterrupt: 

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x76edb1bcb250>> (for post_run_cell), with arguments args (<ExecutionResult object at 76edb0a77750, execution_count=4 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 76edb0a77f50, raw_cell="# Configuration to fufill logging requirement
EPOC.." transformed_cell="# Configuration to fufill logging requirement
EPOC.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2Bworkstation/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-2/notebooks/04_final_assessment.ipynb#W3sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


ConnectionResetError: Connection lost

### Step 5.1b: Contrastive Pretraining (CILP Alignment)

In [5]:
# Configuration to fufill logging requirement
EPOCHS = 5 # Based on Nvidia 05_Assessment on MultiModal AI Agents
LEARNING_RATE = 1e-4 

run = wandb.init(
    project="handsoncv-cilp-assessment", 
    name="5.1b_CILP_Contrastive",
    config={
        "architecture": "CILPModel",
        "fusion_strategy": "contrastive",
        "learning_rate": LEARNING_RATE,
        "epochs": EPOCHS,
        "subset_size": SUBSET_SIZE
    }
)

cilp_model = CILPModel(emb_dim_interm=INTERM_FUSION_EMB_DIM, emb_dim_late=INTERM_FUSION_EMB_DIM).to("cuda")

optimizer = torch.optim.Adam(cilp_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

metrics_cilp = train_fusion_cilp_model(
    cilp_model, 
    train_loader, 
    val_loader, 
    optimizer=optimizer, 
    criterion=torch.nn.CrossEntropyLoss(), # CrossEntropy is used for CLIP loss
    device="cuda", 
    epochs=EPOCHS, 
    scheduler=scheduler, 
    task_mode="contrastive"
)

# CHECK REQUIREMENT:
if metrics_cilp['val_loss'] < 3.2:
    print(f"✅ Success! CILP Val Loss {metrics_cilp['val_loss']:.4f} is below 3.2")

wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mguarino-vanessa-emanuela[0m ([33mhandsoncv-research[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch 0: Val Loss: 0.8117, Acc: 0.00% | Mem: 378.5MB
Epoch 1: Val Loss: 0.6637, Acc: 0.00% | Mem: 378.5MB
Epoch 2: Val Loss: 0.5211, Acc: 0.00% | Mem: 378.5MB
Epoch 3: Val Loss: 0.4564, Acc: 0.00% | Mem: 378.5MB
Epoch 4: Val Loss: 0.4244, Acc: 0.00% | Mem: 378.5MB
✅ Success! CILP Val Loss 0.4244 is below 3.2


0,1
accuracy,▁▁▁▁▁
epoch,▁▃▅▆█
epoch_time_sec,▁██▆▆
learning_rate,█▇▅▃▁
peak_gpu_mem_mb,▁████
train_loss,█▃▂▁▁
val_loss,█▅▃▂▁

0,1
accuracy,0.0
epoch,4.0
epoch_time_sec,6.37821
learning_rate,1e-05
peak_gpu_mem_mb,378.52686
train_loss,0.38895
val_loss,0.42439
