# EfficientDet COCO Benchmark

### Setup Dependencies

In [1]:
# Setup Dependencies (EfficientDet)

import os
import time
import torch
import numpy as np
import psutil
import cv2
import matplotlib.pyplot as plt

from effdet import create_model, DetBenchPredict
from effdet.data import resolve_input_config

print("-" * 50)
print(f"PyTorch version: {torch.__version__}")
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"CPU Cores: {psutil.cpu_count(logical=True)}")
print("-" * 50)


  from .autonotebook import tqdm as notebook_tqdm


--------------------------------------------------
PyTorch version: 2.9.1+cpu
GPU Available: False
CPU Cores: 20
--------------------------------------------------


### Configure Local COCO Paths

In [2]:
COCO_ROOT = os.path.abspath(
    os.path.join(os.getcwd(), "..", "coco-dataset", "coco-2017")
)

VAL_IMAGES = os.path.join(COCO_ROOT, "validation", "data")
VAL_ANN    = os.path.join(COCO_ROOT, "validation", "labels.json")

print("Images path:     ", VAL_IMAGES)
print("Annotation file: ", VAL_ANN)
print("Images found:    ", len(os.listdir(VAL_IMAGES)))

image_files = os.listdir(VAL_IMAGES)
print("Number of validation images:", len(image_files))

Images path:      D:\2025 Winter\CPEN491\object-identification-uav-camera\phase-1\coco-dataset\coco-2017\validation\data
Annotation file:  D:\2025 Winter\CPEN491\object-identification-uav-camera\phase-1\coco-dataset\coco-2017\validation\labels.json
Images found:     3125
Number of validation images: 3125


## Load EfficientDet Model

In [3]:
# Create EfficientDet model (fixed for current effdet version)

from effdet import create_model  # DetBenchPredict not needed here

MODEL_NAME = "efficientdet_d0"   # small & fast variant
NUM_CLASSES = 91                 # COCO-style config

model = create_model(
    MODEL_NAME,
    bench_task="predict",        # returns a prediction bench model
    pretrained=True,             # load pretrained COCO weights
    num_classes=NUM_CLASSES,
)

model.eval()

num_params = sum(p.numel() for p in model.parameters())
print(f"Created model: {MODEL_NAME}")
print(f"Number of parameters: {num_params:,}")


Downloading: "https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d0-f3276ba8.pth" to C:\Users\PC/.cache\torch\hub\checkpoints\efficientdet_d0-f3276ba8.pth
Created model: efficientdet_d0
Number of parameters: 3,878,348


In [4]:
INPUT_H = 512
INPUT_W = 512
print("Using EfficientDet-D0 input size:", INPUT_H, INPUT_W)

Using EfficientDet-D0 input size: 512 512


In [5]:
import json
import numpy as np
from pycocotools.coco import COCO

# ---- Load GT and your predictions ----
coco_gt = COCO(VAL_ANN)

with open("efficientdet_results.json", "r") as f:
    preds = json.load(f)

print("Total predictions loaded:", len(preds))
print("="*60)

# ---- Build helper maps ----
gt_image_ids = set(coco_gt.getImgIds())
gt_category_ids = set(coco_gt.getCatIds())

# ---- Utility: compute IoU ----
def compute_iou(box1, box2):
    # box = [x1, y1, w, h]
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    xa = max(x1, x2)
    ya = max(y1, y2)
    xb = min(x1 + w1, x2 + w2)
    yb = min(y1 + h1, y2 + h2)

    inter = max(0, xb - xa) * max(0, yb - ya)
    union = w1*h1 + w2*h2 - inter

    if union == 0:
        return 0

    return inter / union

# ---- SANITY CHECK LOOP ----
max_checks = 50  # don't print everything, just first 50 predictions
problems_found = False

for i, p in enumerate(preds[:max_checks]):
    img_id    = p["image_id"]
    cat_id    = p["category_id"]
    x, y, w, h = p["bbox"]

    print(f"\nPrediction #{i+1}")
    print(p)

    # 1. IMAGE ID CHECK
    if img_id not in gt_image_ids:
        print("‚ùå ERROR: image_id not in ground-truth annotations!")
        problems_found = True
        continue
    else:
        print("‚úî image_id exists")

    # 2. CATEGORY ID CHECK
    if cat_id not in gt_category_ids:
        print("‚ùå ERROR: category_id invalid for this COCO dataset!")
        problems_found = True
        continue
    else:
        print("‚úî category_id valid")

    # 3. BBOX CHECK
    if w <= 0 or h <= 0:
        print("‚ùå ERROR: invalid width/height!")
        problems_found = True
        continue
    else:
        print("‚úî bbox format OK")

    # 4. IOU CHECK ‚Äî must overlap at least ONE GT box
    ann_ids = coco_gt.getAnnIds(imgIds=[img_id])
    anns = coco_gt.loadAnns(ann_ids)

    max_iou = 0
    for ann in anns:
        gt_box = ann["bbox"]  # [x,y,w,h]
        iou = compute_iou(p["bbox"], gt_box)
        max_iou = max(max_iou, iou)

    if max_iou < 0.01:
        print(f"‚ùå WARNING: No overlap with any GT box (max IoU={max_iou:.3f})")
        # Not always an error ‚Äî model might simply be wrong ‚Äî but check next predictions
    else:
        print(f"‚úî Overlaps GT (max IoU={max_iou:.3f})")

    print("-"*60)

if not problems_found:
    print("\nüéâ Sanity check passed for the first 50 predictions!")
else:
    print("\n‚ö† Some issues found above ‚Äî fix them before running COCOeval.")


loading annotations into memory...
Done (t=0.28s)
creating index...
index created!
Total predictions loaded: 5

Prediction #1
{'image_id': 437351, 'category_id': 25, 'bbox': [291.1741943359375, 268.6634216308594, 28.366241455078125, 138.44677734375], 'score': 0.36864861845970154}
‚úî image_id exists
‚úî category_id valid
‚úî bbox format OK
‚úî Overlaps GT (max IoU=0.183)
------------------------------------------------------------

Prediction #2
{'image_id': 437351, 'category_id': 25, 'bbox': [301.5108947753906, 273.63311767578125, 31.025909423828125, 128.64785766601562], 'score': 0.3410903811454773}
‚úî image_id exists
‚úî category_id valid
‚úî bbox format OK
‚úî Overlaps GT (max IoU=0.186)
------------------------------------------------------------

Prediction #3
{'image_id': 437351, 'category_id': 25, 'bbox': [272.5119323730469, 282.7818298339844, 31.5018310546875, 115.6697998046875], 'score': 0.3115268349647522}
‚úî image_id exists
‚úî category_id valid
‚úî bbox format OK
‚úî Over

## Put model on device & define inference helper

In [6]:
import torchvision.transforms as T
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

transform = T.Compose([
    T.ToTensor(),   # HWC uint8 RGB -> CHW float32 [0,1]
])

def run_efficientdet_on_image(img_bgr, score_thresh=0.001):
    """
    Runs EfficientDet on a single BGR image.

    - Resizes input to EfficientDet's expected size (INPUT_W, INPUT_H)
    - Runs model
    - Rescales boxes back to the original image size
    """
    if img_bgr is None:
        raise ValueError("img_bgr is None (cv2.imread failed)")

    # Original size
    orig_h, orig_w = img_bgr.shape[:2]

    # BGR -> RGB
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    # Resize to model input size
    img_resized = cv2.resize(img_rgb, (INPUT_W, INPUT_H))

    # To tensor
    tensor = transform(img_resized)          # [3, H_in, W_in]
    _, H_in, W_in = tensor.shape

    with torch.no_grad():
        batch = tensor.unsqueeze(0).to(device)   # [1, 3, H_in, W_in]
        preds = model(batch)

        # Most effdet versions: list[dict] with 'boxes', 'scores', 'labels'
        if isinstance(preds, (list, tuple)):
            p = preds[0]
            if isinstance(p, dict):
                boxes = p["boxes"].cpu().numpy()
                scores = p["scores"].cpu().numpy()
                labels = p["labels"].cpu().numpy()
            else:
                arr = p.cpu().numpy()
                boxes = arr[:, :4]
                scores = arr[:, 4]
                labels = arr[:, 5].astype(int)
        elif isinstance(preds, torch.Tensor):
            arr = preds[0].cpu().numpy()
            boxes = arr[:, :4]
            scores = arr[:, 4]
            labels = arr[:, 5].astype(int)
        else:
            raise RuntimeError(f"Unknown EfficientDet output type: {type(preds)}")

    # Filter by confidence
    keep = scores >= score_thresh
    boxes = boxes[keep]
    scores = scores[keep]
    labels = labels[keep]

    if len(boxes) == 0:
        return boxes, scores, labels

    # Boxes are in xyxy for the resized image -> scale back to original
    scale_x = orig_w / W_in
    scale_y = orig_h / H_in

    boxes[:, 0] *= scale_x   # x1
    boxes[:, 2] *= scale_x   # x2
    boxes[:, 1] *= scale_y   # y1
    boxes[:, 3] *= scale_y   # y2

    # Clip to original bounds
    boxes[:, 0] = np.clip(boxes[:, 0], 0, orig_w)
    boxes[:, 2] = np.clip(boxes[:, 2], 0, orig_w)
    boxes[:, 1] = np.clip(boxes[:, 1], 0, orig_h)
    boxes[:, 3] = np.clip(boxes[:, 3], 0, orig_h)

    return boxes, scores, labels


## Loop through ALL validation images and save results to JSON file

In [7]:
import tqdm
import json
import numpy as np
from pycocotools.coco import COCO

# Load GT to build mappings
coco_gt = COCO(VAL_ANN)

img_id_map = {
    img["file_name"]: img["id"]
    for img in coco_gt.loadImgs(coco_gt.getImgIds())
}
cat_ids_sorted = sorted(coco_gt.getCatIds())

results = []

for img_name in tqdm.tqdm(image_files):
    img_path = os.path.join(VAL_IMAGES, img_name)
    img = cv2.imread(img_path)
    if img is None:
        continue

    boxes, scores, labels = run_efficientdet_on_image(img, score_thresh=0.001)

    image_id = img_id_map[img_name]

    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box.astype(float)
        w = x2 - x1
        h = y2 - y1

        label_idx = int(label)

        # Skip labels that don't map to any COCO category in this annotation file
        if label_idx < 0 or label_idx >= len(cat_ids_sorted):
            continue

        category_id = int(cat_ids_sorted[label_idx])

        results.append({
            "image_id": int(image_id),
            "category_id": category_id,
            "bbox": [float(x1), float(y1), float(w), float(h)],
            "score": float(score),
        })

output_path = "efficientdet_results.json"
with open(output_path, "w") as f:
    json.dump(results, f)

print("Saved result file to:", output_path)
print("Total predictions:", len(results))



loading annotations into memory...
Done (t=0.27s)
creating index...
index created!


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3125/3125 [09:49<00:00,  5.30it/s]


Saved result file to: efficientdet_results.json
Total predictions: 256135


## Evaluation

In [8]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np

# Load ground truth and EfficientDet detections
coco_gt = COCO(VAL_ANN)
coco_dt_eff = coco_gt.loadRes("efficientdet_results.json")

img_ids = sorted(coco_gt.getImgIds())

coco_eval_eff = COCOeval(coco_gt, coco_dt_eff, iouType="bbox")
coco_eval_eff.params.imgIds = img_ids

coco_eval_eff.evaluate()
coco_eval_eff.accumulate()
coco_eval_eff.summarize()   # prints standard COCO table

# ---- Extra metrics in the style of your screenshot ----
precision = coco_eval_eff.eval["precision"]  # [TxRxKxAxM]
recall    = coco_eval_eff.eval["recall"]     # [TxKxAxM]

# IoU = 0.50, area = all, maxDets = 100
prec_valid = precision[0, :, :, 0, 2]
prec_valid = prec_valid[prec_valid > -1]
prec_mean  = np.mean(prec_valid)

rec_valid = recall[0, :, 0, 2]
rec_valid = rec_valid[rec_valid > -1]
rec_mean  = np.mean(rec_valid)

f1 = 0 if (prec_mean + rec_mean) == 0 else 2 * (prec_mean * rec_mean) / (prec_mean + rec_mean)

print("\n### EfficientDet Model Accuracy Metrics\n")
print(f"mAP@50:95: {coco_eval_eff.stats[0]:.3f}")
print(f"mAP@50:   {coco_eval_eff.stats[1]:.3f}")
print(f"Precision (COCO-based): {prec_mean:.3f}")
print(f"Recall    (COCO-based): {rec_mean:.3f}")
print(f"F1-Score  (COCO-based): {f1:.3f}")


loading annotations into memory...
Done (t=0.22s)
creating index...
index created!
Loading and preparing results...
DONE (t=1.06s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=12.30s).
Accumulating evaluation results...
DONE (t=4.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.008
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.014
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet