In [None]:
from pathlib import Path
from typing import Callable, Any, cast

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import CocoDetection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn, fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_Weights, FasterRCNN_ResNet50_FPN_V2_Weights
import torchvision.transforms.functional as F
from ml_carbucks import DATA_CAR_DD_DIR
from ml_carbucks.utils.logger import setup_logger

IMG_SIZE = 320
BATCH_SIZE = 16
NUM_CLASSES = 4  # background + 3 object classes
# --- Dataset must return ---
# img: tensor [3,H,W]
# target: dict with:
#   boxes (FloatTensor [N,4]), labels (Int64Tensor [N])
#   optional: image_id, area, iscrowd



class COCODetectionWrapper(Dataset):
    def __init__(self, img_folder, ann_file, transforms=None):
        self.dataset = CocoDetection(img_folder, ann_file)
        self.transforms = transforms

        # Map COCO category IDs (non-sequential) -> continuous label IDs
        self.cat_id_to_label = {
            cat["id"]: idx + 1  # +1 because 0 = background
            for idx, cat in enumerate(self.dataset.coco.cats.values())
        }

    def __getitem__(self, idx):
        img, anns = self.dataset[idx]
        img = np.array(img, dtype=np.uint8)  # needed for Albumentations

        if len(anns) == 0:
            boxes_coco = np.zeros((0, 4), dtype=np.float32)
            labels = np.zeros((0,), dtype=np.int64)
        else:
            boxes_coco = np.array([ann["bbox"] for ann in anns], dtype=np.float32)
            boxes_coco = np.clip(boxes_coco, a_min=0, a_max=None)  # ensure non-negative
            labels = np.array([self.cat_id_to_label[ann["category_id"]] for ann in anns], dtype=np.int64)

        if self.transforms:
            sample = self.transforms(image=img, bboxes=boxes_coco.tolist(), labels=labels.tolist())
            img = sample["image"]
            boxes_coco = np.array(sample["bboxes"], dtype=np.float32)
            labels = np.array(sample["labels"], dtype=np.int64)

        # Convert COCO to VOC format
        if boxes_coco.shape[0] > 0:
            boxes_voc = boxes_coco.copy()
            boxes_voc[:, 2] += boxes_voc[:, 0]  # x + w → x2
            boxes_voc[:, 3] += boxes_voc[:, 1]  # y + h → y2
        else:
            boxes_voc = np.zeros((0, 4), dtype=np.float32)

        target = {
            "boxes": torch.from_numpy(boxes_voc),
            "labels": torch.from_numpy(labels),
            "image_id": torch.tensor(idx),
            "area": torch.from_numpy((boxes_voc[:, 2] - boxes_voc[:, 0]) * (boxes_voc[:, 3] - boxes_voc[:, 1])),
            "iscrowd": torch.zeros((boxes_voc.shape[0],), dtype=torch.int64),
        }

        return img, target


    def __len__(self):
        return len(self.dataset)


def create_transforms(is_training: bool) -> A.Compose:
    
    arr = []
    arr.extend([
        A.LongestMaxSize(max_size=IMG_SIZE),
        A.PadIfNeeded(
            min_height=IMG_SIZE,
            min_width=IMG_SIZE,
            border_mode=0,  # constant padding
            fill=(0, 0, 0),  # black
        ),
    ])
    if is_training:
        arr.append(A.HorizontalFlip(p=0.5))
    
    arr.extend([
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])


    custom_transform = A.Compose(
        cast(
            Any,
            arr,
        ),
        bbox_params=A.BboxParams(
            format="coco",  # we now correctly pass COCO-format boxes in/out
            label_fields=["labels"],
            min_visibility=0.3,
        ),
    )

    return custom_transform
# --- Dataset ---
train_dataset = COCODetectionWrapper(
    img_folder=DATA_CAR_DD_DIR / "images" / "train",
    ann_file=DATA_CAR_DD_DIR / "instances_train.json",
    transforms=create_transforms(is_training=True),
)
val_dataset = COCODetectionWrapper(
    img_folder=DATA_CAR_DD_DIR / "images" / "val",
    ann_file=DATA_CAR_DD_DIR / "instances_val.json",
    transforms=create_transforms(is_training=False),
)

def collate_fn(batch):
    return tuple(zip(*batch))



train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,          # 2–8 is typical, memory permitting
    shuffle=True,
    num_workers=BATCH_SIZE // 2,         # adjust based on your CPU
    pin_memory=True,
    collate_fn=collate_fn, # crucial
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=BATCH_SIZE // 2,
    pin_memory=True,
    collate_fn=collate_fn,
)


logger = setup_logger("faster_rcnn")


In [None]:
from ml_carbucks.utils.inference import plot_img_pred as ppp
imgs, targets = next(iter(train_loader))

iii = 5
print(imgs[iii], targets[iii])

ppp(imgs[iii], targets[iii]['boxes'], coords="xyxy")


In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from tqdm import tqdm
# clean gpu memory 

torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
# model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
in_features = model.roi_heads.box_predictor.cls_score.in_features # type: ignore
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)
model = model.to(device)

# --- Optimizer ---
backbone_params = []
head_params = []
for name, param in model.named_parameters():
    if "backbone" in name:
        backbone_params.append(param)
    else:
        head_params.append(param)
optimizer = torch.optim.AdamW(
    [
        {"params": backbone_params, "lr": 5e-5, "weight_decay": 1e-4},
        {"params": head_params, "lr": 1e-3, "weight_decay": 1e-4},
    ]
)

EPOCHS = 200
# --- Scheduler (optional) ---
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)


# --- Training loop ---
num_epochs = EPOCHS
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for imgs, targets in tqdm(train_loader):

        imgs = list(img.to(device) for img in imgs)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, targets)
        loss = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        loss.backward() # type: ignore
        optimizer.step()

        total_loss += loss.item() # type: ignore

    if scheduler:
        scheduler.step()

    # --- Validation on training data (resized) ---
    model.eval()
    metric = MeanAveragePrecision()
    with torch.no_grad():
        for imgs, targets in tqdm(val_loader):
            imgs = list(img.to(device) for img in imgs)
            outputs = model(imgs)

            # Prepare targets in expected dict format
            targets_cpu = [{k: v.cpu() for k, v in t.items()} for t in targets]
            outputs_cpu = [{k: v.cpu() for k, v in t.items()} for t in outputs]
            metric.update(outputs_cpu, targets_cpu)

    val_res = metric.compute()
    metric.reset()
    logger.info(f"Epoch {epoch+1}/{num_epochs} | Loss: {total_loss:.4f} | val_map: {val_res['map'].item():.4f}")

In [None]:



# # --- Model ---
# num_classes = 4  # background + 3 classes
# model = fasterrcnn_resnet50_fpn(pretrained=True)

# # Replace the head (classifier)
# in_features = model.roi_heads.box_predictor.cls_score.in_features
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)


# # --- Training Setup ---
# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# num_epochs = 20
# for epoch in range(num_epochs):
#     model.train()
#     for imgs, targets in train_loader:
#         imgs = [img.to(device) for img in imgs]
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

#         loss_dict = model(imgs, targets)
#         losses = sum(loss for loss in loss_dict.values())

#         optimizer.zero_grad()
#         losses.backward()
#         optimizer.step()

#     lr_scheduler.step()
#     print(f"Epoch {epoch+1}/{num_epochs}, loss: {losses.item():.4f}")


# model.eval()
# with torch.no_grad():
#     for imgs, _ in val_loader:
#         imgs = [img.to(device) for img in imgs]
#         preds = model(imgs)  # list of dicts with boxes, labels, scores
