In [None]:
# notebook for overfitting Lnorm on one image of UAVDT

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# add parent directory, it should add parent of parent
sys.path.append("..")

import torch
from torch import nn, optim
from torch.optim.lr_scheduler import StepLR

# from torch.utils.tensorboard import SummaryWriter

from torchvision import transforms
from tqdm import tqdm

from models import Res18FPNCEASC  # Adjust as needed
from utils.visdrone_dataloader import get_dataset
from utils.losses import Lnorm, Lamm, DetectionLoss  # Adjust as needed

In [3]:
def safe_shape(x):
    if isinstance(x, torch.Tensor):
        return x.shape
    elif isinstance(x, (list, tuple)):
        return [safe_shape(e) for e in x]
    return type(x)

In [4]:
# get the setup 
mode = "train"  # Change to "eval" or "test" as needed

config = {
    "root_dir": "/home/soroush1/scratch/eecs_project",
    "batch_size": 1,
    "num_workers": 4,
    "num_epochs": 1,
    "lr": 1e-1,
    "config_path": "../configs/resnet18_fpn_feature_extractor.py",
}

print("done")

done


In [5]:
if __name__ == "__main__":
    torch.autograd.set_detect_anomaly(True)

    # Unpack config
    root_dir = config["root_dir"]
    batch_size = config["batch_size"]
    num_workers = config["num_workers"]
    num_epochs = config["num_epochs"]
    learning_rate = config["lr"]
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Dataset and loader
    dataloader = get_dataset(
        root_dir=root_dir,
        split="train",
        transform=None,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
    )

    # Model
    model = Res18FPNCEASC(config_path=config["config_path"], num_classes=10)
    model.to(device)
    model.train()

    # Optimizer
    optimizer = optim.SGD(model.parameters(), lr=learning_rate) 
    scheduler = StepLR(optimizer, step_size=500, gamma=0.1)
    
    # Losses
    l_det = DetectionLoss(num_bins=16, num_classes=10, num_anchors=6)

    batch = next(iter(dataloader))

    images = batch["image"].to(device)
    targets = {
        "boxes": batch["boxes"],
        "labels": batch["labels"],
        "image_id": batch["image_id"],
        "orig_size": batch["orig_size"],
    }
    print("\n🔍 Inspecting `targets` structure:")
    for i in range(len(targets["boxes"])):
        print(f"--- Sample {i} ---")
        print(f"Image ID:         {targets['image_id'][i]}")
        print(f"Original Size:    {targets['orig_size'][i]}")
        print(f"Boxes shape:      {targets['boxes'][i].shape}")  # [N_i, 4]
        print(f"Labels shape:     {targets['labels'][i].shape}")  # [N_i]
        print(f"Boxes:            {targets['boxes'][i]}")
        print(f"Labels:           {targets['labels'][i]}")

    n_iters = 2000

    # writer = SummaryWriter()
    
    for n in range(n_iters):

        optimizer.zero_grad()
    
        # Forward pass
        outputs = model(images, stage="train")
        (
            cls_outs,
            reg_outs,
            soft_mask_outs,
            sparse_cls_feats_outs,
            sparse_reg_feats_outs,
            dense_cls_feats_outs,
            dense_reg_feats_outs,
            feats,
            anchors,
        ) = outputs

        # print("\n🔍 Output shapes from model:")
        # for i in range(len(cls_outs)):
        #     print(f"--- FPN Level {i} ---")
        #     print(f"cls_outs[{i}]:              {safe_shape(cls_outs[i])}")
        #     print(f"reg_outs[{i}]:              {safe_shape(reg_outs[i])}")
        #     print(
        #         f"soft_mask_outs[{i}]:    {safe_shape(soft_mask_outs[i])}"
        #     )
        #     print(
        #         f"sparse_cls_feats[{i}]:      {safe_shape(sparse_cls_feats_outs[i])}"
        #     )
        #     print(
        #         f"sparse_reg_feats[{i}]:      {safe_shape(sparse_reg_feats_outs[i])}"
        #     )
        #     print(
        #         f"dense_cls_feats[{i}]:       {safe_shape(dense_cls_feats_outs[i])}"
        #     )
        #     print(
        #         f"dense_reg_feats[{i}]:       {safe_shape(dense_reg_feats_outs[i])}"
        #     )
        #     print(f"feats[{i}]:                 {safe_shape(feats[i])}")
        
        # for i, anchor in enumerate(anchors):
        #     print(f"P{i+3} Anchors shape: {anchor.shape}")

        loss_det = l_det(cls_outs, reg_outs, anchors, targets, device=device)
    
        if n % 100 == 0:
            print(f"Loss Det, iter {n}: {loss_det['total_loss'].item()}")
            print(f"\tLoss Det, iter {n}: {loss_det['qfl'].item()}")
            print(f"\tLoss Det, iter {n}: {loss_det['dfl'].item()}")
            print(f"\tLoss Det, iter {n}: {loss_det['giou'].item()}")
            
        # writer.add_scalar('Norm Loss/overfit',loss_norm.item(),n)

        loss_det["total_loss"].backward()
    
        optimizer.step()
        scheduler.step()

    # writer.close()
    print('Overfit complete')


🔍 Inspecting `targets` structure:
--- Sample 0 ---
Image ID:         tensor([939])
Original Size:    tensor([ 765, 1360])
Boxes shape:      torch.Size([51, 4])
Labels shape:     torch.Size([51])
Boxes:            tensor([[1.1300e+02, 5.1100e+02, 2.8100e+02, 6.5400e+02],
        [4.0400e+02, 6.7100e+02, 5.8200e+02, 7.6500e+02],
        [4.9900e+02, 4.7700e+02, 6.0700e+02, 5.5600e+02],
        [6.1400e+02, 5.3100e+02, 7.4100e+02, 6.3100e+02],
        [6.3400e+02, 4.4400e+02, 7.6000e+02, 5.3400e+02],
        [6.2200e+02, 3.6700e+02, 6.4600e+02, 4.0100e+02],
        [6.1900e+02, 3.9000e+02, 6.5000e+02, 4.1600e+02],
        [5.3100e+02, 3.0400e+02, 6.1500e+02, 3.6300e+02],
        [7.7300e+02, 3.3100e+02, 8.9900e+02, 4.5500e+02],
        [8.9300e+02, 2.7100e+02, 9.7800e+02, 3.5300e+02],
        [9.6100e+02, 2.4200e+02, 1.0160e+03, 2.8100e+02],
        [9.3400e+02, 1.9900e+02, 9.8500e+02, 2.2600e+02],
        [1.0070e+03, 2.0000e+02, 1.0530e+03, 2.4000e+02],
        [1.0760e+03, 2.0400e+02,