In [2]:
# %% [markdown]
# Common setup: paths for models and results

from pathlib import Path
import json
import shutil

MODELS_DIR = Path("trained_models")
RESULTS_DIR = Path("results")
MODELS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR.mkdir(parents=True, exist_ok=True)


In [2]:
# %% 
# ======================================
# 1. Faster R-CNN: train + COCO mAP
# ======================================
import torch
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Subset
from tqdm import tqdm
from pycocotools.cocoeval import COCOeval

from src.utils.coco_utils import make_coco_loaders
from src.models.frcnn import get_frcnn_model


def get_coco_api_from_loader(loader):
    """
    Robustly get the underlying pycocotools COCO object from a DataLoader.
    """
    ds = loader.dataset
    for _ in range(10):
        if hasattr(ds, "coco"):
            return ds.coco
        if hasattr(ds, "dataset"):
            ds = ds.dataset
        else:
            break
    raise AttributeError(
        "Could not find 'coco' attribute in dataset. "
        "Please check make_coco_loaders implementation."
    )


def get_img_ids_for_loader(loader):
    """
    Build a list img_ids such that:
        img_ids[i] = COCO image_id corresponding to loader.dataset[i]

    Handles Subset and simple wrappers. Assumes val_loader has shuffle=False.
    """
    ds = loader.dataset
    indices = None  # indices in the base dataset

    while True:
        if isinstance(ds, Subset):
            if indices is None:
                indices = list(ds.indices)
            else:
                indices = [indices[i] for i in ds.indices]
            ds = ds.dataset
            continue

        if not hasattr(ds, "coco") and hasattr(ds, "dataset"):
            ds = ds.dataset
            continue

        break

    if not hasattr(ds, "coco"):
        raise RuntimeError(
            "Could not find a base COCO dataset with a 'coco' attribute under loader.dataset"
        )

    base_ds = ds
    coco = base_ds.coco

    if indices is None:
        indices = list(range(len(base_ds)))

    if hasattr(base_ds, "ids"):
        base_img_ids = list(base_ds.ids)
    else:
        base_img_ids = list(sorted(coco.getImgIds()))

    loader_img_ids = [int(base_img_ids[i]) for i in indices]
    return loader_img_ids


def train_one_epoch(model, loader, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, targets in tqdm(loader, desc="Train FRCNN"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += losses.item()
    return running_loss / len(loader)


@torch.no_grad()
def evaluate_coco_mAP(model, loader, device):
    """
    Run COCO-style evaluation on *your subset* of val2017.
    Returns dict with AP, AP50, AP75, APs, APm, APl.
    """
    model.eval()
    coco = get_coco_api_from_loader(loader)
    coco_img_ids_all = set(coco.getImgIds())

    loader_img_ids = get_img_ids_for_loader(loader)
    assert len(loader_img_ids) == len(loader.dataset), \
        "Length mismatch between loader_img_ids and loader.dataset"

    results = []
    global_idx = 0

    for images, _targets in tqdm(loader, desc="Eval FRCNN (COCO mAP)"):
        images = [img.to(device) for img in images]
        outputs = model(images)

        batch_size = len(outputs)
        batch_img_ids = loader_img_ids[global_idx: global_idx + batch_size]
        global_idx += batch_size

        for img_id, output in zip(batch_img_ids, outputs):
            if img_id not in coco_img_ids_all:
                continue

            boxes = output["boxes"].detach().cpu()
            scores = output["scores"].detach().cpu()
            labels = output["labels"].detach().cpu()

            if boxes.numel() == 0:
                continue

            # xyxy -> xywh
            boxes_xywh = boxes.clone()
            boxes_xywh[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes_xywh[:, 3] = boxes[:, 3] - boxes[:, 1]

            for box, score, label in zip(boxes_xywh, scores, labels):
                results.append(
                    {
                        "image_id": int(img_id),
                        "category_id": int(label),  # COCO cat_ids
                        "bbox": box.tolist(),
                        "score": float(score),
                    }
                )

    if not results:
        print("No detections to evaluate.")
        return None

    coco_dt = coco.loadRes(results)
    coco_eval = COCOeval(coco, coco_dt, iouType="bbox")

    eval_img_ids = sorted({r["image_id"] for r in results})
    coco_eval.params.imgIds = eval_img_ids

    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "AP":   float(coco_eval.stats[0]),
        "AP50": float(coco_eval.stats[1]),
        "AP75": float(coco_eval.stats[2]),
        "APs":  float(coco_eval.stats[3]),
        "APm":  float(coco_eval.stats[4]),
        "APl":  float(coco_eval.stats[5]),
    }
    return metrics


# ------------ main FRCNN script ------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_loader, val_loader = make_coco_loaders(
    root="/mnt/ssd2/santana-coco/data/coco",
    batch_size=2,
    num_workers=4,
    train_limit=500,   # None for full train2017 per proposal
    val_limit=100,     # None for full val2017 per proposal
)

num_classes = 91  # standard COCO setting (incl. background)
model = get_frcnn_model(num_classes=num_classes).to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 3
best_ap = 0.0

frcnn_history = []

# Evaluate pre-trained FRCNN before fine-tuning
print("Evaluating FRCNN before training...")
metrics0 = evaluate_coco_mAP(model, val_loader, device)
print("Initial FRCNN metrics:", metrics0)

entry0 = {"epoch": 0, "train_loss": None}
if metrics0 is not None:
    entry0.update(metrics0)
frcnn_history.append(entry0)

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, device)
    scheduler.step()

    metrics = evaluate_coco_mAP(model, val_loader, device)
    if metrics is not None:
        ap = metrics["AP"]
        print(
            f"[FRCNN] Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | "
            f"AP={ap:.4f} | AP50={metrics['AP50']:.4f} | "
            f"AP75={metrics['AP75']:.4f} | APs={metrics['APs']:.4f} | "
            f"APm={metrics['APm']:.4f} | APl={metrics['APl']:.4f}"
        )

        history_entry = {"epoch": epoch + 1, "train_loss": train_loss}
        history_entry.update(metrics)
        frcnn_history.append(history_entry)

        if ap > best_ap:
            best_ap = ap
            torch.save(
                model.state_dict(),
                MODELS_DIR / "frcnn_best.pth",
            )
            print(f"  -> New best FRCNN model saved (AP={ap:.4f})")
    else:
        print(
            f"[FRCNN] Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | no detections on val set"
        )
        frcnn_history.append(
            {"epoch": epoch + 1, "train_loss": train_loss}
        )

    torch.save(
        model.state_dict(),
        MODELS_DIR / f"frcnn_epoch{epoch + 1}.pth",
    )

# Save FRCNN history for later plotting
with open(RESULTS_DIR / "frcnn_history.json", "w") as f:
    json.dump(frcnn_history, f, indent=2)
print("Saved FRCNN history to", RESULTS_DIR / "frcnn_history.json")


Using device: cuda
loading annotations into memory...
Done (t=4.08s)
creating index...
index created!
loading annotations into memory...
Done (t=0.19s)
creating index...
index created!
Evaluating FRCNN before training...


Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.51it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:23<00:00,  3.01it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 16.01it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.11s).
Accumulating evaluation results...
DONE (t=0.07s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.027
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.054
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.024
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.034
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.040
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.023
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.015
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.036
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.040
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:23<00:00,  2.98it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.89it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.18s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.115
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.246
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.092
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.105
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.168
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.109
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.096
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.176
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.187
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:22<00:00,  3.02it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.86it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.18s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.154
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.338
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.111
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.119
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.208
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.215
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.131
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.240
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.262
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

In [2]:
# %% 
# ======================================
# 2. YOLOv8: train + val (Ultralytics)
# ======================================
from ultralytics import YOLO

# Use the same subset COCO yaml you built earlier
# (assumed to point to subset train/val COCO folders)
yolo_model = YOLO("yolov8s.pt")  # pretrained on full COCO

yolo_history = []

# Optional: evaluate pretrained YOLO before fine-tuning on your subset
print("Evaluating YOLOv8s (pretrained) on coco_subset val...")
metrics0 = yolo_model.val(data="data/coco_subset.yaml", split="val")
pre_entry = {
    "stage": "pretrained",
    "map":   float(metrics0.box.map),
    "map50": float(metrics0.box.map50),
    "map75": float(metrics0.box.map75),
}
yolo_history.append(pre_entry)
print("YOLOv8s initial:", pre_entry)

# Train on subset COCO
yolo_results = yolo_model.train(
    data="data/coco_subset.yaml",
    epochs=3,
    imgsz=640,
    batch=16,          # shrink if OOM
    device=0,          # or "cuda:0"
    workers=4,
    project=str(MODELS_DIR / "yolo"),
    name="yolov8s_640_subset",
)

# Validation on subset val (consistent with proposal setup)
metrics = yolo_model.val(data="data/coco_subset.yaml", split="val")
post_entry = {
    "stage": "finetuned",
    "map":   float(metrics.box.map),
    "map50": float(metrics.box.map50),
    "map75": float(metrics.box.map75),
}

# per-class mAP50-95 (for extra analysis if needed)
try:
    post_entry["per_class_map"] = [float(x) for x in metrics.box.maps]
except Exception:
    post_entry["per_class_map"] = None

yolo_history.append(post_entry)
print("YOLOv8s fine-tuned:", post_entry)

# Save YOLO history JSON for plotting
with open(RESULTS_DIR / "yolo_history.json", "w") as f:
    json.dump(yolo_history, f, indent=2)
print("Saved YOLO history to", RESULTS_DIR / "yolo_history.json")

# Copy Ultralytics training results.csv into results/ for easy loading
yolo_run_dir = MODELS_DIR / "yolo" / "yolov8s_640_subset"
yolo_results_csv = yolo_run_dir / "results.csv"
if yolo_results_csv.exists():
    shutil.copy2(yolo_results_csv, RESULTS_DIR / "yolov8_results.csv")
    print("Copied YOLO results.csv to", RESULTS_DIR / "yolov8_results.csv")
else:
    print("WARNING: YOLO results.csv not found at", yolo_results_csv)

# Best YOLO weights are already in:
#   trained_models/yolo/yolov8s_640_subset/weights/best.pt
# which matches your 'trained_models/' requirement.


Evaluating YOLOv8s (pretrained) on coco_subset val...
Ultralytics 8.3.231 üöÄ Python-3.12.3 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 4060, 7814MiB)
YOLOv8s summary (fused): 72 layers, 11,156,544 parameters, 0 gradients, 28.6 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 7331.3¬±1356.6 MB/s, size: 169.9 KB)
[K[34m[1mval: [0mScanning /mnt/ssd2/santana-coco/data/coco_subset_500_100/labels/val.cache... 100 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 100/100 463.5Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 7/7 8.3it/s 0.8s0.1s
                   all        100        860      0.719      0.636      0.709      0.557
                person         54        288      0.869      0.625      0.792      0.558
               bicycle          2          2      0.644          1      0.995      0.672
                   car         11  

In [3]:
# %% 
# ======================================
# 3. HF DETR: train + val loss + COCO mAP
# ======================================
import torch
from torch.optim import AdamW
from tqdm import tqdm
from pycocotools.cocoeval import COCOeval
from transformers import DetrImageProcessor, DetrForObjectDetection
from torch.utils.data import Subset  # used in get_img_ids_for_loader

from src.utils.coco_utils import make_coco_loaders


def get_img_ids_for_loader(loader):
    """
    Build a list img_ids such that:
        img_ids[i] = COCO image_id corresponding to loader.dataset[i]

    This walks through possible wrappers (Subset, custom DatasetWrapper, etc.)
    and recovers the base COCO dataset's id list.
    Assumes val_loader is created with shuffle=False.
    """
    ds = loader.dataset
    indices = None  # indices in the base dataset
    while True:
        if isinstance(ds, Subset):
            if indices is None:
                indices = list(ds.indices)
            else:
                indices = [indices[i] for i in ds.indices]
            ds = ds.dataset
            continue

        if not hasattr(ds, "coco") and hasattr(ds, "dataset"):
            ds = ds.dataset
            continue

        break

    if not hasattr(ds, "coco"):
        raise RuntimeError(
            "Could not find a base COCO dataset with a 'coco' attribute under loader.dataset"
        )

    base_ds = ds
    coco = base_ds.coco

    if indices is None:
        indices = list(range(len(base_ds)))

    if hasattr(base_ds, "ids"):
        base_img_ids = list(base_ds.ids)
    else:
        base_img_ids = list(sorted(coco.getImgIds()))

    loader_img_ids = [int(base_img_ids[i]) for i in indices]
    return loader_img_ids


def get_coco_api_from_loader(loader):
    """
    Robustly get the underlying pycocotools COCO object from a DataLoader.
    """
    ds = loader.dataset
    for _ in range(10):
        if hasattr(ds, "coco"):
            return ds.coco
        if hasattr(ds, "dataset"):
            ds = ds.dataset
        else:
            break
    raise AttributeError(
        "Could not find 'coco' attribute in dataset. "
        "Please check make_coco_loaders implementation."
    )


# -------------------------
# Helper: xyxy ‚Üí HF targets
# -------------------------
def build_hf_targets(targets):
    """
    Convert a batch of targets from your format:
        {
            "boxes": Tensor[num_boxes, 4] in xyxy,
            "labels": Tensor[num_boxes],
            (optionally "image_id", "area", "iscrowd")
        }
    into HF/COCO-style annotations.
    """
    hf_targets = []

    for t in targets:
        boxes = t["boxes"]  # (N, 4), xyxy
        labels = t["labels"]

        if boxes.numel() == 0:
            annotations = []
        else:
            xywh = boxes.clone()
            xywh[:, 2] = boxes[:, 2] - boxes[:, 0]
            xywh[:, 3] = boxes[:, 3] - boxes[:, 1]

            annotations = []
            for box, label in zip(xywh, labels):
                bbox = box.tolist()
                category_id = int(label.item() if torch.is_tensor(label) else label)
                ann = {
                    "bbox": bbox,
                    "category_id": category_id,
                    "area": float(bbox[2] * bbox[3]),
                    "iscrowd": 0,
                }
                annotations.append(ann)

        if "image_id" in t:
            if torch.is_tensor(t["image_id"]):
                image_id = int(t["image_id"].item())
            else:
                image_id = int(t["image_id"])
        else:
            image_id = 0

        hf_targets.append({
            "image_id": image_id,
            "annotations": annotations,
        })

    return hf_targets


# -------------------------
# Training loop for HF DETR
# -------------------------
def train_one_epoch_detr_hf(model, processor, loader, optimizer, device):
    model.train()
    running_loss = 0.0

    for images, targets in tqdm(loader, desc="Train HF-DETR"):
        hf_targets = build_hf_targets(targets)

        encoding = processor(
            images=list(images),
            annotations=hf_targets,
            return_tensors="pt",
        )

        pixel_values = encoding["pixel_values"].to(device)
        labels = [
            {k: v.to(device) for k, v in target.items()}
            for target in encoding["labels"]
        ]

        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(loader)


@torch.no_grad()
def validate_one_epoch_detr_hf(model, processor, loader, device):
    model.eval()
    running_loss = 0.0

    for images, targets in tqdm(loader, desc="Val HF-DETR (loss)"):
        hf_targets = build_hf_targets(targets)

        encoding = processor(
            images=list(images),
            annotations=hf_targets,
            return_tensors="pt",
        )

        pixel_values = encoding["pixel_values"].to(device)
        labels = [
            {k: v.to(device) for k, v in target.items()}
            for target in encoding["labels"]
        ]

        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss

        running_loss += loss.item()

    return running_loss / len(loader)


# -------------------------
# COCO mAP evaluation for HF DETR
# -------------------------
@torch.no_grad()
def evaluate_coco_mAP_detr(model, processor, loader, device):
    """
    COCO-style evaluation for HF DETR on *your subset* of val2017.

    - Ensures image_ids in results match subset used by the DataLoader.
    - Maps DETR label indices -> COCO category_id using id2label + coco.getCatIds.
    - Restricts COCOeval to only those imgIds we actually predicted on.
    """
    model.eval()

    coco = get_coco_api_from_loader(loader)
    coco_img_ids_all = set(coco.getImgIds())

    loader_img_ids = get_img_ids_for_loader(loader)
    assert len(loader_img_ids) == len(loader.dataset), \
        "Length mismatch between loader_img_ids and loader.dataset"

    id2label = {int(k): v for k, v in model.config.id2label.items()}

    label_idx_to_cat_id = {}
    for idx, name in id2label.items():
        cat_ids = coco.getCatIds(catNms=[name])
        if len(cat_ids) > 0:
            label_idx_to_cat_id[idx] = cat_ids[0]

    if not label_idx_to_cat_id:
        print("WARNING: could not map any DETR labels to COCO category ids.")
        return None

    results = []
    global_idx = 0

    for images, _targets in tqdm(loader, desc="Eval HF-DETR (COCO mAP)"):
        images = list(images)

        target_sizes = []
        for img in images:
            if isinstance(img, torch.Tensor):
                h, w = img.shape[-2:]
            else:
                w, h = img.size
            target_sizes.append([h, w])

        encoding = processor(images=images, return_tensors="pt")
        pixel_values = encoding["pixel_values"].to(device)

        outputs = model(pixel_values=pixel_values)
        processed_outputs = processor.post_process_object_detection(
            outputs=outputs,
            target_sizes=torch.tensor(target_sizes, device=device),
            threshold=0.0,
        )

        batch_size = len(processed_outputs)
        batch_img_ids = loader_img_ids[global_idx: global_idx + batch_size]
        global_idx += batch_size

        for img_id, pred in zip(batch_img_ids, processed_outputs):
            if img_id not in coco_img_ids_all:
                continue

            boxes = pred["boxes"].detach().cpu()
            scores = pred["scores"].detach().cpu()
            labels = pred["labels"].detach().cpu()

            if boxes.numel() == 0:
                continue

            boxes_xywh = boxes.clone()
            boxes_xywh[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes_xywh[:, 3] = boxes[:, 3] - boxes[:, 1]

            for box, score, label in zip(boxes_xywh, scores, labels):
                label_idx = int(label)
                if label_idx not in label_idx_to_cat_id:
                    continue

                cat_id = int(label_idx_to_cat_id[label_idx])

                results.append(
                    {
                        "image_id": int(img_id),
                        "category_id": cat_id,
                        "bbox": box.tolist(),
                        "score": float(score),
                    }
                )

    if not results:
        print("No detections to evaluate (results list is empty).")
        return None

    coco_dt = coco.loadRes(results)
    coco_eval = COCOeval(coco, coco_dt, iouType="bbox")

    eval_img_ids = sorted({r["image_id"] for r in results})
    coco_eval.params.imgIds = eval_img_ids

    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "AP":   float(coco_eval.stats[0]),
        "AP50": float(coco_eval.stats[1]),
        "AP75": float(coco_eval.stats[2]),
        "APs":  float(coco_eval.stats[3]),
        "APm":  float(coco_eval.stats[4]),
        "APl":  float(coco_eval.stats[5]),
    }
    return metrics


# ------------ main DETR script ------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_loader, val_loader = make_coco_loaders(
    root="/mnt/ssd2/santana-coco/data/coco",
    batch_size=2,
    num_workers=4,
    train_limit=500,
    val_limit=100,
)

processor = DetrImageProcessor.from_pretrained(
    "facebook/detr-resnet-50",
    revision="no_timm",
)
model = DetrForObjectDetection.from_pretrained(
    "facebook/detr-resnet-50",
    revision="no_timm",
).to(device)

optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

num_epochs = 3
best_ap = 0.0
detr_history = []

# Optional: evaluate DETR before fine-tuning
print("Evaluating HF-DETR before training...")
metrics0 = evaluate_coco_mAP_detr(
    model=model,
    processor=processor,
    loader=val_loader,
    device=device,
)
print("Initial HF-DETR metrics:", metrics0)

entry0 = {"epoch": 0, "train_loss": None, "val_loss": None}
if metrics0 is not None:
    entry0.update(metrics0)
detr_history.append(entry0)

for epoch in range(num_epochs):
    train_loss = train_one_epoch_detr_hf(
        model=model,
        processor=processor,
        loader=train_loader,
        optimizer=optimizer,
        device=device,
    )

    val_loss = validate_one_epoch_detr_hf(
        model=model,
        processor=processor,
        loader=val_loader,
        device=device,
    )

    metrics = evaluate_coco_mAP_detr(
        model=model,
        processor=processor,
        loader=val_loader,
        device=device,
    )

    if metrics is not None:
        ap = metrics["AP"]
        print(
            f"[HF-DETR] Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | "
            f"AP={ap:.4f} | AP50={metrics['AP50']:.4f} | "
            f"AP75={metrics['AP75']:.4f} | APs={metrics['APs']:.4f} | "
            f"APm={metrics['APm']:.4f} | APl={metrics['APl']:.4f}"
        )

        history_entry = {
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_loss": val_loss,
        }
        history_entry.update(metrics)
        detr_history.append(history_entry)

        if ap > best_ap:
            best_ap = ap
            torch.save(
                model.state_dict(),
                MODELS_DIR / "detr_hf_best.pth",
            )
            print(f"  -> New best DETR model saved (AP={ap:.4f})")
    else:
        print(
            f"[HF-DETR] Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | "
            f"no detections on val set"
        )
        detr_history.append(
            {
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "val_loss": val_loss,
            }
        )

    torch.save(
        model.state_dict(),
        MODELS_DIR / f"detr_hf_epoch{epoch + 1}.pth",
    )

# Save DETR history for plotting
with open(RESULTS_DIR / "detr_hf_history.json", "w") as f:
    json.dump(detr_history, f, indent=2)
print("Saved HF-DETR history to", RESULTS_DIR / "detr_hf_history.json")


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
loading annotations into memory...
Done (t=4.15s)
creating index...
index created!
loading annotations into memory...
Done (t=0.23s)
creating index...
index created!
Evaluating HF-DETR before training...


Eval HF-DETR (COCO mAP):   0%|          | 0/100 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 12.82it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.07s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:55<00:00,  4.53it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.18it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.24it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.24s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.57it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.14it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.26it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.59it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.12it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.23it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.24s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10