# RF-DETR COCO Benchmark

### Setup Dependencies

In [1]:
import os
import json
import time
import torch
import numpy as np
import psutil
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.torch as fout
from fiftyone import ViewField as F

from rfdetr import RFDETRBase
from rfdetr.util.coco_classes import COCO_CLASSES
from rfdetr.util.metrics import MetricsPlotSink

print(f"PyTorch version: {torch.__version__}")
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"CPU Cores: {psutil.cpu_count(logical=True)}")

PyTorch version: 2.2.2+cpu
GPU Available: False
CPU Cores: 12


### Configure Local COCO Paths

In [2]:
COCO_ROOT = os.path.abspath(os.path.join(os.getcwd(), "..", "coco-dataset", "coco-2017"))

VAL_IMAGES = os.path.join(COCO_ROOT, "validation", "data")
VAL_ANN = os.path.join(COCO_ROOT, "validation", "labels.json")

print("Images path:", VAL_IMAGES)
print("Annotation file:", VAL_ANN)
print("Images found:", len(os.listdir(VAL_IMAGES)))

Images path: C:\object-identification-uav-camera\phase-1\coco-dataset\coco-2017\validation\data
Annotation file: C:\object-identification-uav-camera\phase-1\coco-dataset\coco-2017\validation\labels.json
Images found: 3125


### Load COCO Dataset in FiftyOne

In [3]:
import fiftyone as fo

dataset = fo.load_dataset("coco-val-local")
session = fo.launch_app(dataset)

### Initialize Base Model

In [4]:
model = RFDETRBase()
print("Initialized RFDETRBase (official).")

Loading pretrain weights
Initialized RFDETRBase (official).


### Sanity Check

In [5]:
image_filenames = os.listdir(VAL_IMAGES)
sample_path = os.path.join(VAL_IMAGES, image_filenames[0])
res = model.predict(sample_path)

print("Prediction output for 1 image:")
print(res)
print("Type:", type(res))

if len(res) > 0:
    print("First element:", res[0])
    print("Type of first element:", type(res[0]))

Model is not optimized for inference. Latency may be higher than expected. You can optimize the model for inference by calling model.optimize_for_inference().


Prediction output for 1 image:
Detections(xyxy=array([[  5.5638456, 166.15982  , 154.83444  , 262.77823  ],
       [415.35245  , 156.7585   , 463.1068   , 298.09616  ],
       [292.65765  , 218.50575  , 352.791    , 317.1424   ],
       [166.46848  , 232.87022  , 186.74416  , 267.03363  ],
       [366.27576  , 218.85649  , 418.3539   , 318.808    ],
       [550.05505  , 309.32883  , 585.08167  , 400.6922   ],
       [230.74385  , 177.14418  , 267.04306  , 213.15543  ],
       [383.8396   , 172.25049  , 401.1604   , 207.90234  ],
       [409.12643  , 217.53506  , 442.65793  , 306.8642   ],
       [462.523    , 353.80615  , 639.7777   , 424.70316  ],
       [448.96915  , 121.108284 , 461.03314  , 141.85272  ],
       [318.941    , 225.45256  , 449.36572  , 318.76144  ],
       [339.7693   , 176.45566  , 368.3755   , 222.00975  ]],
      dtype=float32), mask=None, confidence=array([0.94458145, 0.8771516 , 0.8583023 , 0.8088513 , 0.8056688 ,
       0.7526451 , 0.73157495, 0.6774858 , 0.671



### Inference Loop

In [None]:
preds_json = []
image_filenames = os.listdir(VAL_IMAGES)

with open(VAL_ANN, 'r') as f:
    coco_data = json.load(f)

image_id_map = {img["file_name"]: img["id"] for img in coco_data["images"]}

print("Running inference on validation images...")

for filename in tqdm(image_filenames):
    img_path = os.path.join(VAL_IMAGES, filename)

    # --- FIX: Ensure RGB ---
    with Image.open(img_path) as im:
        if im.mode != "RGB":
            im = im.convert("RGB")
        temp_path = "temp_rgb_image.jpg"
        im.save(temp_path)
        img_path_for_model = temp_path
    # ------------------------

    det = model.predict(img_path_for_model, threshold=0.001)

    image_id = image_id_map[filename]

    boxes = det.xyxy
    scores = det.confidence
    classes = det.class_id

    for box, score, cls in zip(boxes, scores, classes):
        x1, y1, x2, y2 = box.tolist()
        w = x2 - x1
        h = y2 - y1

        preds_json.append({
            "image_id": image_id,
            "category_id": int(cls),
            "score": float(score),
            "bbox": [float(x1), float(y1), float(w), float(h)]
        })

with open("predictions.json", "w") as f:
    json.dump(preds_json, f)

Running inference on validation images...


  2%|██▉                                                                                                                                              | 62/3125 [01:10<56:02,  1.10s/it]

### COCO Evaluation

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

coco_gt = COCO(VAL_ANN)
coco_dt = coco_gt.loadRes("predictions.json")

coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

In [None]:
print("### Accuracy Metrics of RF-DETR Base Model\n")

# ---------------------------
# 1. mAP metrics
# ---------------------------
mAP_50_95 = coco_eval.stats[0]     # AP@[0.50:0.95]
mAP_50    = coco_eval.stats[1]     # AP@0.50

print(f"mAP@50:95: {mAP_50_95:.3f}")
print(f"mAP@50:    {mAP_50:.3f}\n")

# ---------------------------
# 2. Precision / Recall from COCOeval
# ---------------------------
# COCOeval stores precision as:
# precision[IoU, recall, class, area, maxDets]
precision = coco_eval.eval['precision']

# Use:
# IoU = 0 (0.50 IoU)
# class = 0:80 (average)
# area = 0 (all)
# maxDets = 2 (100 detections)

# Extract valid values and average across classes/IoUs
prec_valid = precision[0, :, :, 0, 2]   # IoU=0.50, area=all, maxDet=100
prec_valid = prec_valid[prec_valid > -1]  # remove invalid entries
prec_mean = np.mean(prec_valid)

# Recall values:
recall = coco_eval.eval['recall']   # recall[IoU, class, area, maxDets]
rec_valid = recall[0, :, 0, 2]      # IoU=0.50, area=all, maxDets=100
rec_valid = rec_valid[rec_valid > -1]
rec_mean = np.mean(rec_valid)

# ---------------------------
# 3. F1-score from COCO PR
# ---------------------------
if prec_mean + rec_mean == 0:
    f1 = 0
else:
    f1 = 2 * (prec_mean * rec_mean) / (prec_mean + rec_mean)

print(f"Precision (COCO-based): {prec_mean:.3f}")
print(f"Recall    (COCO-based): {rec_mean:.3f}")
print(f"F1-Score  (COCO-based): {f1:.3f}\n")

## Repeat Steps for RF-DETR Nano
### Initialize Nano Model

In [None]:
from rfdetr import RFDETRNano
model_nano = RFDETRNano()
print("Initialized RF-DETR Nano model.")

### Inference Loop for Nano

In [None]:
preds_json_nano = []
image_filenames = os.listdir(VAL_IMAGES)

with open(VAL_ANN, 'r') as f:
    coco_data = json.load(f)

image_id_map = {img["file_name"]: img["id"] for img in coco_data["images"]}

print("Running inference on validation images (Nano model)...")

for filename in tqdm(image_filenames):
    img_path = os.path.join(VAL_IMAGES, filename)

    # Ensure RGB
    with Image.open(img_path) as im:
        if im.mode != "RGB":
            im = im.convert("RGB")
        temp_path = "temp_rgb_image.jpg"
        im.save(temp_path)
        img_path_for_model = temp_path

    det = model_nano.predict(img_path_for_model, threshold=0.001)

    image_id = image_id_map[filename]

    boxes = det.xyxy
    scores = det.confidence
    classes = det.class_id

    for box, score, cls in zip(boxes, scores, classes):
        x1, y1, x2, y2 = box.tolist()
        w = x2 - x1
        h = y2 - y1

        preds_json_nano.append({
            "image_id": image_id,
            "category_id": int(cls),
            "score": float(score),
            "bbox": [float(x1), float(y1), float(w), float(h)]
        })

with open("predictions_nano.json", "w") as f:
    json.dump(preds_json_nano, f)

### COCO Evaluation for Nano

In [None]:
coco_gt = COCO(VAL_ANN)
coco_dt_nano = coco_gt.loadRes("predictions_nano.json")

coco_eval_nano = COCOeval(coco_gt, coco_dt_nano, "bbox")
coco_eval_nano.evaluate()
coco_eval_nano.accumulate()
coco_eval_nano.summarize()

In [None]:
precision = coco_eval_nano.eval['precision']
recall = coco_eval_nano.eval['recall']

# Extract IoU=0.50, area=all, maxDets=100
prec_valid = precision[0, :, :, 0, 2]
prec_valid = prec_valid[prec_valid > -1]
prec_mean = np.mean(prec_valid)

rec_valid = recall[0, :, 0, 2]
rec_valid = rec_valid[rec_valid > -1]
rec_mean = np.mean(rec_valid)

f1 = 0 if (prec_mean + rec_mean == 0) else 2 * (prec_mean * rec_mean) / (prec_mean + rec_mean)

print("### Nano Model Accuracy Metrics\n")
print(f"mAP@50:95: {coco_eval_nano.stats[0]:.3f}")
print(f"mAP@50:    {coco_eval_nano.stats[1]:.3f}")
print(f"Precision (COCO-based): {prec_mean:.3f}")
print(f"Recall    (COCO-based): {rec_mean:.3f}")
print(f"F1-Score  (COCO-based): {f1:.3f}")