# RF-DETR COCO Benchmark

### Setup Dependencies

In [1]:
import os
import json
import time
import torch
import numpy as np
import psutil
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.torch as fout
from fiftyone import ViewField as F

from rfdetr import RFDETRBase
from rfdetr.util.coco_classes import COCO_CLASSES
from rfdetr.util.metrics import MetricsPlotSink

print(f"PyTorch version: {torch.__version__}")
print(f"CPU Cores: {psutil.cpu_count(logical=True)}")

print("CUDA/GPU available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("GPU name:", torch.cuda.get_device_name(0))

PyTorch version: 2.5.1+cu121
CPU Cores: 12
CUDA/GPU available: True
CUDA version: 12.1
GPU name: NVIDIA GeForce GTX 1660 Ti


### Configure Local COCO Paths

In [2]:
COCO_ROOT = os.path.abspath(os.path.join(os.getcwd(), "..", "coco-dataset", "coco-2017"))

VAL_IMAGES = os.path.join(COCO_ROOT, "validation", "data")
VAL_ANN = os.path.join(COCO_ROOT, "validation", "labels.json")

print("Images path:", VAL_IMAGES)
print("Annotation file:", VAL_ANN)
print("Images found:", len(os.listdir(VAL_IMAGES)))

Images path: C:\object-identification-uav-camera\phase-1\coco-dataset\coco-2017\validation\data
Annotation file: C:\object-identification-uav-camera\phase-1\coco-dataset\coco-2017\validation\labels.json
Images found: 3125


### Load COCO Dataset in FiftyOne

In [3]:
import fiftyone as fo

dataset = fo.load_dataset("coco-val-local")
session = fo.launch_app(dataset)

### Initialize Nano Model

In [4]:
from rfdetr import RFDETRNano
model_nano = RFDETRNano()
print("Initialized RF-DETR Nano model.")

Using a different number of positional encodings than DINOv2, which means we're not loading DINOv2 backbone weights. This is not a problem if finetuning a pretrained RF-DETR model.
Using patch size 16 instead of 14, which means we're not loading DINOv2 backbone weights. This is not a problem if finetuning a pretrained RF-DETR model.
Loading pretrain weights
Initialized RF-DETR Nano model.


### Jetson Simulation Setup Cell

In [6]:
# ============================
# Jetson Orin Nano 8GB Simulation Mode
# ============================

print("\nEnabling Jetson Orin Nano 8GB simulation constraints...\n")

# ---- 1. Disable cuDNN fast paths (Jetson uses slower kernels) ----
torch.backends.cudnn.enabled = False
torch.backends.cudnn.benchmark = False
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
torch.set_float32_matmul_precision("high")
print("Disabled cuDNN fast algorithms & TF32.")

# ---- 2. Limit CPU parallelism ----
import os
torch.set_num_threads(2)
os.environ["OMP_NUM_THREADS"] = "2"
print("CPU threads limited to 2 (Jetson-like).")

# ---- 3. I/O latency to mimic Jetson eMMC/SD card ----
IO_LATENCY = 0.004  # 4 ms
print("Added 4ms I/O latency per image.")

# ---- 4. Simulate Jetson memory bandwidth ----
BANDWIDTH_LATENCY = 0.0008  # 0.8 ms
print("Added memory bandwidth stall of 0.8ms.")

# ---- 5. Simulated GPU compute stall ----
# Jetson Orin Nano runs ~2–4× slower than GTX 1660 Ti for transformer models
GPU_STALL = 0.003  # 3 ms (tuneable)
print("Added 3ms GPU stall per inference (Jetson compute speed).")

# ---- 6. Constrain VRAM usage ----
torch.cuda.set_per_process_memory_fraction(0.80)
print("GPU VRAM restricted to ~80% of your card (Jetson-equivalent usable memory).")

print("\nJetson Simulation Mode READY.\n")


Enabling Jetson Orin Nano 8GB simulation constraints...

Disabled cuDNN fast algorithms & TF32.
CPU threads limited to 2 (Jetson-like).
Added 4ms I/O latency per image.
Added memory bandwidth stall of 0.8ms.
Added 3ms GPU stall per inference (Jetson compute speed).
GPU VRAM restricted to ~80% of your card (Jetson-equivalent usable memory).

Jetson Simulation Mode READY.



### Inference Loop

In [10]:
print("Running Jetson-simulated inference on validation images...")

preds_json_nano_jetson = []
image_filenames = os.listdir(VAL_IMAGES)

with open(VAL_ANN, 'r') as f:
    coco_data = json.load(f)

image_id_map = {img["file_name"]: img["id"] for img in coco_data["images"]}

for filename in tqdm(image_filenames):
    # Jetson storage bottleneck
    time.sleep(IO_LATENCY)

    img_path = os.path.join(VAL_IMAGES, filename)

    # Load but DO NOT resize (important!)
    with Image.open(img_path) as im:
        if im.mode != "RGB":
            im = im.convert("RGB")
        temp_path = "temp_rgb_image.jpg"
        im.save(temp_path)

    img_path_for_model = temp_path

    # Jetson memory bandwidth bottleneck
    time.sleep(BANDWIDTH_LATENCY)

    # Jetson GPU compute bottleneck
    time.sleep(GPU_STALL)

    # Inference
    det = model_nano.predict(img_path_for_model, threshold=0.001)

    image_id = image_id_map[filename]

    boxes = det.xyxy
    scores = det.confidence
    classes = det.class_id

    for box, score, cls in zip(boxes, scores, classes):
        x1, y1, x2, y2 = box.tolist()
        w = x2 - x1
        h = y2 - y1

        preds_json_nano_jetson.append({
            "image_id": image_id,
            "category_id": int(cls),
            "score": float(score),
            "bbox": [float(x1), float(y1), float(w), float(h)]
        })

with open("predictions_nano_jetson.json", "w") as f:
    json.dump(preds_json_nano_jetson, f)

print("\nInference complete (Jetson simulated).")

Running Jetson-simulated inference on validation images...


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [04:32<00:00, 11.45it/s]



Inference complete (Jetson simulated).


### COCO Evaluation for Nano

In [11]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

print("Running COCO evaluation on Jetson-simulated predictions...")

coco_gt = COCO(VAL_ANN)
coco_dt_nano_jetson = coco_gt.loadRes("predictions_nano_jetson.json")

coco_eval_nano_jetson = COCOeval(coco_gt, coco_dt_nano_jetson, "bbox")
coco_eval_nano_jetson.evaluate()
coco_eval_nano_jetson.accumulate()
coco_eval_nano_jetson.summarize()

Running COCO evaluation on Jetson-simulated predictions...
loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
Loading and preparing results...
DONE (t=6.97s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=43.54s).
Accumulating evaluation results...
DONE (t=10.34s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.428
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.621
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.456
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.234
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.508
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.704
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.342
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.555
 Ave

In [12]:
print("### Jetson-Simulated Nano Model Accuracy Metrics\n")

print(f"mAP@50:95: {coco_eval_nano_jetson.stats[0]:.3f}")
print(f"mAP@50:    {coco_eval_nano_jetson.stats[1]:.3f}")

precision = coco_eval_nano_jetson.eval['precision']
recall = coco_eval_nano_jetson.eval['recall']

# IoU=0.50, area=all, maxDets=100
prec_valid = precision[0, :, :, 0, 2]
prec_valid = prec_valid[prec_valid > -1]
prec_mean = np.mean(prec_valid)

rec_valid = recall[0, :, 0, 2]
rec_valid = rec_valid[rec_valid > -1]
rec_mean = np.mean(rec_valid)

if prec_mean + rec_mean == 0:
    f1 = 0
else:
    f1 = 2 * (prec_mean * rec_mean) / (prec_mean + rec_mean)

print(f"Precision (COCO-based): {prec_mean:.3f}")
print(f"Recall    (COCO-based): {rec_mean:.3f}")
print(f"F1-Score  (COCO-based): {f1:.3f}")


### Jetson-Simulated Nano Model Accuracy Metrics

mAP@50:95: 0.428
mAP@50:    0.621
Precision (COCO-based): 0.621
Recall    (COCO-based): 0.832
F1-Score  (COCO-based): 0.712
