In [1]:
import torch
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Subset
from tqdm import tqdm
from pycocotools.cocoeval import COCOeval

from src.utils.coco_utils import make_coco_loaders
from src.models.frcnn import get_frcnn_model


def get_coco_api_from_loader(loader):
    """
    Robustly get the underlying pycocotools COCO object from a DataLoader.
    """
    ds = loader.dataset
    for _ in range(10):
        if hasattr(ds, "coco"):
            return ds.coco
        if hasattr(ds, "dataset"):
            ds = ds.dataset
        else:
            break
    raise AttributeError(
        "Could not find 'coco' attribute in dataset. "
        "Please check make_coco_loaders implementation."
    )


def get_img_ids_for_loader(loader):
    """
    Build a list img_ids such that:
        img_ids[i] = COCO image_id corresponding to loader.dataset[i]

    Handles Subset and simple wrappers. Assumes val_loader has shuffle=False.
    """
    ds = loader.dataset
    indices = None  # indices in the base dataset

    while True:
        if isinstance(ds, Subset):
            if indices is None:
                indices = list(ds.indices)
            else:
                indices = [indices[i] for i in ds.indices]
            ds = ds.dataset
            continue

        if not hasattr(ds, "coco") and hasattr(ds, "dataset"):
            ds = ds.dataset
            continue

        break

    if not hasattr(ds, "coco"):
        raise RuntimeError(
            "Could not find a base COCO dataset with a 'coco' attribute under loader.dataset"
        )

    base_ds = ds
    coco = base_ds.coco

    if indices is None:
        indices = list(range(len(base_ds)))

    if hasattr(base_ds, "ids"):
        base_img_ids = list(base_ds.ids)
    else:
        base_img_ids = list(sorted(coco.getImgIds()))

    loader_img_ids = [int(base_img_ids[i]) for i in indices]
    return loader_img_ids


def train_one_epoch(model, loader, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, targets in tqdm(loader, desc="Train FRCNN"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += losses.item()
    return running_loss / len(loader)


@torch.no_grad()
def evaluate_coco_mAP(model, loader, device):
    """
    Run COCO-style evaluation on *your subset* of val2017.
    Returns dict with AP, AP50, AP75, APs, APm, APl.
    """
    model.eval()
    coco = get_coco_api_from_loader(loader)
    coco_img_ids_all = set(coco.getImgIds())

    # Map loader idx -> true COCO image_id (for the subset)
    loader_img_ids = get_img_ids_for_loader(loader)
    assert len(loader_img_ids) == len(loader.dataset), \
        "Length mismatch between loader_img_ids and loader.dataset"

    results = []
    global_idx = 0

    for images, targets in tqdm(loader, desc="Eval FRCNN (COCO mAP)"):
        images = [img.to(device) for img in images]
        outputs = model(images)

        batch_size = len(outputs)
        batch_img_ids = loader_img_ids[global_idx: global_idx + batch_size]
        global_idx += batch_size

        for img_id, output in zip(batch_img_ids, outputs):
            if img_id not in coco_img_ids_all:
                continue

            boxes = output["boxes"].detach().cpu()
            scores = output["scores"].detach().cpu()
            labels = output["labels"].detach().cpu()

            if boxes.numel() == 0:
                continue

            # xyxy -> xywh
            boxes_xywh = boxes.clone()
            boxes_xywh[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes_xywh[:, 3] = boxes[:, 3] - boxes[:, 1]

            for box, score, label in zip(boxes_xywh, scores, labels):
                results.append(
                    {
                        "image_id": int(img_id),
                        "category_id": int(label),  # your dataset already uses COCO cat_ids
                        "bbox": box.tolist(),
                        "score": float(score),
                    }
                )

    if not results:
        print("No detections to evaluate.")
        return None

    coco_dt = coco.loadRes(results)
    coco_eval = COCOeval(coco, coco_dt, iouType="bbox")

    # evaluate only on images we actually predicted on
    eval_img_ids = sorted({r["image_id"] for r in results})
    coco_eval.params.imgIds = eval_img_ids

    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "AP": float(coco_eval.stats[0]),
        "AP50": float(coco_eval.stats[1]),
        "AP75": float(coco_eval.stats[2]),
        "APs": float(coco_eval.stats[3]),
        "APm": float(coco_eval.stats[4]),
        "APl": float(coco_eval.stats[5]),
    }
    return metrics


# ------------------------------------------------------------
# Main: data, model, optimizer, training + validation + COCO mAP
# ------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_loader, val_loader = make_coco_loaders(
    root="/mnt/ssd2/santana-coco/data/coco",
    batch_size=2,
    num_workers=4,
    train_limit=500,   # None for full train2017 per proposal
    val_limit=100,     # None for full val2017 per proposal
)

num_classes = 91  # standard COCO setting (incl. background)
model = get_frcnn_model(num_classes=num_classes).to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 6  # short run due to limited GPU
best_ap = 0.0

# (Optional sanity check: evaluate pre-trained model before training)
print("Evaluating FRCNN before training...")
metrics0 = evaluate_coco_mAP(model, val_loader, device)
print("Initial metrics:", metrics0)

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, device)
    scheduler.step()

    metrics = evaluate_coco_mAP(model, val_loader, device)
    if metrics is not None:
        ap = metrics["AP"]
        print(
            f"Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | "
            f"AP={ap:.4f} | AP50={metrics['AP50']:.4f} | "
            f"AP75={metrics['AP75']:.4f} | APs={metrics['APs']:.4f} | "
            f"APm={metrics['APm']:.4f} | APl={metrics['APl']:.4f}"
        )

        if ap > best_ap:
            best_ap = ap
            torch.save(model.state_dict(), "frcnn_best.pth")
            print(f"  -> New best FRCNN model saved (AP={ap:.4f})")
    else:
        print(
            f"Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | no detections on val set"
        )

    torch.save(model.state_dict(), f"frcnn_epoch{epoch + 1}.pth")


Using device: cuda
loading annotations into memory...
Done (t=4.88s)
creating index...
index created!
loading annotations into memory...
Done (t=0.23s)
creating index...
index created!
Evaluating FRCNN before training...


Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.43it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:22<00:00,  3.01it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.97it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.17s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.038
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.074
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.026
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.039
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.080
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.031
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.058
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.065
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:23<00:00,  3.01it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.93it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.16s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.124
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.293
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.072
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.085
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.184
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.158
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.107
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.200
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.204
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:23<00:00,  2.99it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.98it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.17s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.157
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.341
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.114
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.125
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.213
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.206
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.128
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.231
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.241
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:23<00:00,  3.00it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.89it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.15s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.228
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.226
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.311
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.307
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.176
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.309
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.322
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:24<00:00,  2.95it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.79it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.15s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.238
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.431
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.228
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.311
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.341
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.187
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.315
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.328
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train FRCNN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [01:23<00:00,  2.99it/s]
Eval FRCNN (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.98it/s]


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.14s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.246
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.441
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.243
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.157
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.328
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.354
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.196
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.320
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.331
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

In [1]:
from ultralytics import YOLO

# Load pretrained YOLOv8 on COCO
model = YOLO("yolov8s.pt")  # or yolov8n/m based on VRAM


results = model.train(
    data="data/coco_subset.yaml",
    epochs=6,
    imgsz=640,
    batch=16,          # shrink if OOM
    device=0,          # or "cuda:0"
    workers=4,
    project="runs/coco",
    name="yolov8s_640_subset",
)

# Validation on full val2017 (as per proposal)
metrics = model.val(data="data/coco_subset.yaml", split="val")
print("mAP50-95:", metrics.box.map)
print("mAP50:", metrics.box.map50)
print("mAP75:", metrics.box.map75)


New https://pypi.org/project/ultralytics/8.3.233 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.231 üöÄ Python-3.12.3 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 4060, 7814MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data/coco_subset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=6, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8s_640_sub

In [1]:
import torch
from torch.optim import AdamW
from tqdm import tqdm
from pycocotools.cocoeval import COCOeval
from transformers import DetrImageProcessor, DetrForObjectDetection
from torch.utils.data import Subset  # <-- NEW

from src.utils.coco_utils import make_coco_loaders

def get_img_ids_for_loader(loader):
    """
    Build a list img_ids such that:
        img_ids[i] = COCO image_id corresponding to loader.dataset[i]

    This walks through possible wrappers (Subset, custom DatasetWrapper, etc.)
    and recovers the base COCO dataset's id list.
    Assumes val_loader is created with shuffle=False.
    """
    ds = loader.dataset

    # Unwrap Subset and generic wrappers that store the underlying dataset in .dataset
    indices = None  # indices in the base dataset
    while True:
        if isinstance(ds, Subset):
            # Map current indices through this Subset
            if indices is None:
                indices = list(ds.indices)
            else:
                indices = [indices[i] for i in ds.indices]
            ds = ds.dataset
            continue

        # handle "wrapper.dataset" style nesting (e.g. transforms)
        if not hasattr(ds, "coco") and hasattr(ds, "dataset"):
            ds = ds.dataset
            continue

        break

    if not hasattr(ds, "coco"):
        raise RuntimeError(
            "Could not find a base COCO dataset with a 'coco' attribute under loader.dataset"
        )

    base_ds = ds  # this should be a CocoDetection-like dataset
    coco = base_ds.coco

    # Base indices (in base_ds) that correspond to loader order
    if indices is None:
        indices = list(range(len(base_ds)))

    # Get the list of COCO image IDs in the same order as base_ds
    if hasattr(base_ds, "ids"):
        # Torchvision CocoDetection has .ids = list of image_ids
        base_img_ids = list(base_ds.ids)
    else:
        # Fallback: use sorted COCO IDs (this is what CocoDetection does internally)
        base_img_ids = list(sorted(coco.getImgIds()))

    # Map loader index -> COCO image_id
    loader_img_ids = [int(base_img_ids[i]) for i in indices]
    return loader_img_ids



def get_coco_api_from_loader(loader):
    """
    Robustly get the underlying pycocotools COCO object from a DataLoader.
    """
    ds = loader.dataset
    for _ in range(10):
        if hasattr(ds, "coco"):
            return ds.coco
        if hasattr(ds, "dataset"):
            ds = ds.dataset
        else:
            break
    raise AttributeError(
        "Could not find 'coco' attribute in dataset. "
        "Please check make_coco_loaders implementation."
    )


# ------------------------------------------------------------
# Helper: convert your targets (xyxy) ‚Üí COCO-style annotations
# ------------------------------------------------------------
def build_hf_targets(targets):
    """
    Convert a batch of targets from your format:
        {
            "boxes": Tensor[num_boxes, 4] in xyxy,
            "labels": Tensor[num_boxes],
            (optionally "image_id", "area", "iscrowd")
        }
    into HuggingFace/COCO-style:
        {
            "image_id": int,
            "annotations": [
                {"bbox": [x, y, w, h], "category_id": int, "area": float, "iscrowd": 0/1},
                ...
            ]
        }
    """
    hf_targets = []

    for t in targets:
        boxes = t["boxes"]  # (N, 4), xyxy
        labels = t["labels"]

        if boxes.numel() == 0:
            annotations = []
        else:
            # xyxy ‚Üí xywh
            xywh = boxes.clone()
            xywh[:, 2] = boxes[:, 2] - boxes[:, 0]  # w = x_max - x_min
            xywh[:, 3] = boxes[:, 3] - boxes[:, 1]  # h = y_max - y_min

            annotations = []
            for box, label in zip(xywh, labels):
                bbox = box.tolist()
                category_id = int(label.item() if torch.is_tensor(label) else label)
                ann = {
                    "bbox": bbox,
                    "category_id": category_id,
                    "area": float(bbox[2] * bbox[3]),  # w * h
                    "iscrowd": 0,
                }
                annotations.append(ann)

        # Keep image_id if present, otherwise 0
        if "image_id" in t:
            if torch.is_tensor(t["image_id"]):
                image_id = int(t["image_id"].item())
            else:
                image_id = int(t["image_id"])
        else:
            image_id = 0

        hf_targets.append({
            "image_id": image_id,
            "annotations": annotations,
        })

    return hf_targets


# ------------------------------------------------------------
# Training loop for HuggingFace DETR
# ------------------------------------------------------------
def train_one_epoch_detr_hf(model, processor, loader, optimizer, device):
    model.train()
    running_loss = 0.0

    for images, targets in tqdm(loader, desc="Train HF-DETR"):
        hf_targets = build_hf_targets(targets)

        encoding = processor(
            images=images,
            annotations=hf_targets,
            return_tensors="pt",
        )

        pixel_values = encoding["pixel_values"].to(device)
        labels = [
            {k: v.to(device) for k, v in target.items()}
            for target in encoding["labels"]
        ]

        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(loader)


# ------------------------------------------------------------
# Validation loss loop for HuggingFace DETR
# ------------------------------------------------------------
@torch.no_grad()
def validate_one_epoch_detr_hf(model, processor, loader, device):
    model.eval()
    running_loss = 0.0

    for images, targets in tqdm(loader, desc="Val HF-DETR (loss)"):
        hf_targets = build_hf_targets(targets)

        encoding = processor(
            images=images,
            annotations=hf_targets,
            return_tensors="pt",
        )

        pixel_values = encoding["pixel_values"].to(device)
        labels = [
            {k: v.to(device) for k, v in target.items()}
            for target in encoding["labels"]
        ]

        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss

        running_loss += loss.item()

    return running_loss / len(loader)


# ------------------------------------------------------------
# COCO mAP evaluation for HuggingFace DETR
# ------------------------------------------------------------
@torch.no_grad()
def evaluate_coco_mAP_detr(model, processor, loader, device):
    """
    COCO-style evaluation for HF DETR on *your subset* of val2017.

    Fixes:
      - Ensures image_ids in results match the subset used by the DataLoader.
      - Maps DETR class indices -> COCO category_id using model.config.id2label
        and coco.getCatIds(catNms=[name]).
      - Restricts COCOeval to only those imgIds we actually predicted on.
    """
    model.eval()

    # --- COCO API + image-id mapping ---
    coco = get_coco_api_from_loader(loader)
    coco_img_ids_all = set(coco.getImgIds())

    # loader_img_ids[i] = COCO image id for loader.dataset[i]
    loader_img_ids = get_img_ids_for_loader(loader)
    assert len(loader_img_ids) == len(loader.dataset), \
        "Length mismatch between loader_img_ids and loader.dataset"

    # --- build mapping: DETR label index -> COCO category_id ---
    # id2label is like {"0": "N/A", "1": "person", ...}
    id2label = {int(k): v for k, v in model.config.id2label.items()}

    label_idx_to_cat_id = {}
    for idx, name in id2label.items():
        # Some DETR classes may not exist in this COCO annotation file
        cat_ids = coco.getCatIds(catNms=[name])
        if len(cat_ids) > 0:
            label_idx_to_cat_id[idx] = cat_ids[0]

    if not label_idx_to_cat_id:
        print("WARNING: could not map any DETR labels to COCO category ids.")
        return None

    results = []
    global_idx = 0  # position in loader_img_ids

    for images, _targets in tqdm(loader, desc="Eval HF-DETR (COCO mAP)"):
        # images is a tuple; convert to list for processor
        images = list(images)

        # original H,W for each image
        target_sizes = []
        for img in images:
            if isinstance(img, torch.Tensor):
                h, w = img.shape[-2:]
            else:  # PIL
                w, h = img.size
            target_sizes.append([h, w])

        encoding = processor(images=images, return_tensors="pt")
        pixel_values = encoding["pixel_values"].to(device)

        outputs = model(pixel_values=pixel_values)
        processed_outputs = processor.post_process_object_detection(
            outputs=outputs,
            target_sizes=torch.tensor(target_sizes, device=device),
            threshold=0.0,   # keep all, COCOeval will handle scores
        )

        batch_size = len(processed_outputs)
        batch_img_ids = loader_img_ids[global_idx: global_idx + batch_size]
        global_idx += batch_size

        for img_id, pred in zip(batch_img_ids, processed_outputs):
            if img_id not in coco_img_ids_all:
                # should not happen with correct mapping, but be safe
                continue

            boxes = pred["boxes"].detach().cpu()
            scores = pred["scores"].detach().cpu()
            labels = pred["labels"].detach().cpu()

            if boxes.numel() == 0:
                continue

            # xyxy -> xywh
            boxes_xywh = boxes.clone()
            boxes_xywh[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes_xywh[:, 3] = boxes[:, 3] - boxes[:, 1]

            for box, score, label in zip(boxes_xywh, scores, labels):
                label_idx = int(label)

                # skip labels not present in this COCO annotation
                if label_idx not in label_idx_to_cat_id:
                    continue

                cat_id = int(label_idx_to_cat_id[label_idx])

                results.append(
                    {
                        "image_id": int(img_id),
                        "category_id": cat_id,
                        "bbox": box.tolist(),
                        "score": float(score),
                    }
                )

    if not results:
        print("No detections to evaluate (results list is empty).")
        return None

    # --- COCOeval restricted to our subset of images ---
    coco_dt = coco.loadRes(results)
    coco_eval = COCOeval(coco, coco_dt, iouType="bbox")

    eval_img_ids = sorted({r["image_id"] for r in results})
    coco_eval.params.imgIds = eval_img_ids

    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "AP":   float(coco_eval.stats[0]),
        "AP50": float(coco_eval.stats[1]),
        "AP75": float(coco_eval.stats[2]),
        "APs":  float(coco_eval.stats[3]),
        "APm":  float(coco_eval.stats[4]),
        "APl":  float(coco_eval.stats[5]),
    }
    return metrics



# ------------------------------------------------------------
# Main: data, model, optimizer, training + validation + COCO mAP
# ------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_loader, val_loader = make_coco_loaders(
    root="/mnt/ssd2/santana-coco/data/coco",
    batch_size=2,
    num_workers=4,
    train_limit=500,   # set to None for full train2017 per proposal
    val_limit=100,     # set to None for full val2017 per proposal
)

processor = DetrImageProcessor.from_pretrained(
    "facebook/detr-resnet-50",
    revision="no_timm",
)
model = DetrForObjectDetection.from_pretrained(
    "facebook/detr-resnet-50",
    revision="no_timm",
).to(device)

optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

num_epochs = 10
best_ap = 0.0

for epoch in range(num_epochs):
    train_loss = train_one_epoch_detr_hf(
        model=model,
        processor=processor,
        loader=train_loader,
        optimizer=optimizer,
        device=device,
    )

    val_loss = validate_one_epoch_detr_hf(
        model=model,
        processor=processor,
        loader=val_loader,
        device=device,
    )

    metrics = evaluate_coco_mAP_detr(
        model=model,
        processor=processor,
        loader=val_loader,
        device=device,
    )

    if metrics is not None:
        ap = metrics["AP"]
        print(
            f"[HF-DETR] Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | "
            f"AP={ap:.4f} | AP50={metrics['AP50']:.4f} | "
            f"AP75={metrics['AP75']:.4f} | APs={metrics['APs']:.4f} | "
            f"APm={metrics['APm']:.4f} | APl={metrics['APl']:.4f}"
        )

        if ap > best_ap:
            best_ap = ap
            torch.save(model.state_dict(), "detr_hf_best.pth")
            print(f"  -> New best DETR model saved (AP={ap:.4f})")
    else:
        print(
            f"[HF-DETR] Epoch {epoch + 1}/{num_epochs} | "
            f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | "
            f"no detections on val set"
        )

    torch.save(model.state_dict(), f"detr_hf_epoch{epoch + 1}.pth")


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
loading annotations into memory...
Done (t=4.13s)
creating index...
index created!
loading annotations into memory...
Done (t=0.19s)
creating index...
index created!


Train HF-DETR:   0%|          | 0/250 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.
Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.59it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.08it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.21it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.17s).
Accumulating evaluation results...
DONE (t=0.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.004
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.005
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.005
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.57it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.18it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.36it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.22s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.56it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.16it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.27it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.22s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.005
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:55<00:00,  4.51it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.14it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.23it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.007
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.007
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.007
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:55<00:00,  4.52it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.06it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.05it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.006
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.006
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.006
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.55it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.19it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.25it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.55it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.06it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.28it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.22s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.61it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.22it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.28it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.57it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.00it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.15it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Train HF-DETR: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:54<00:00,  4.58it/s]
Val HF-DETR (loss): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.00it/s]
Eval HF-DETR (COCO mAP): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.25it/s]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.11s).
Accumulating evaluation results...
DONE (t=0.08s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10