# EfficientDet COCO Benchmark

### Setup Dependencies

In [41]:
# Setup Dependencies (EfficientDet)

import os
import time
import torch
import numpy as np
import psutil
import cv2
import matplotlib.pyplot as plt

from effdet import create_model, DetBenchPredict
from effdet.data import resolve_input_config

print("-" * 50)
print(f"PyTorch version: {torch.__version__}")
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"CPU Cores: {psutil.cpu_count(logical=True)}")
print("-" * 50)


--------------------------------------------------
PyTorch version: 2.9.1
GPU Available: False
CPU Cores: 8
--------------------------------------------------


### Configure Local COCO Paths

In [42]:
COCO_ROOT = os.path.abspath(
    os.path.join(os.getcwd(), "..", "coco-dataset", "coco-2017")
)

VAL_IMAGES = os.path.join(COCO_ROOT, "validation", "data")
VAL_ANN    = os.path.join(COCO_ROOT, "validation", "labels.json")

print("Images path:     ", VAL_IMAGES)
print("Annotation file: ", VAL_ANN)
print("Images found:    ", len(os.listdir(VAL_IMAGES)))

image_files = os.listdir(VAL_IMAGES)
print("Number of validation images:", len(image_files))

Images path:      /Users/g/Documents/CPEN491/object-identification-uav-camera/phase-1/coco-dataset/coco-2017/validation/data
Annotation file:  /Users/g/Documents/CPEN491/object-identification-uav-camera/phase-1/coco-dataset/coco-2017/validation/labels.json
Images found:     3125
Number of validation images: 3125


## Load EfficientDet Model

In [43]:
# Create EfficientDet model (fixed for current effdet version)

from effdet import create_model  # DetBenchPredict not needed here

MODEL_NAME = "efficientdet_d0"   # small & fast variant
NUM_CLASSES = 91                 # COCO-style config

model = create_model(
    MODEL_NAME,
    bench_task="predict",        # returns a prediction bench model
    pretrained=True,             # load pretrained COCO weights
    num_classes=NUM_CLASSES,
)

model.eval()

num_params = sum(p.numel() for p in model.parameters())
print(f"Created model: {MODEL_NAME}")
print(f"Number of parameters: {num_params:,}")


Created model: efficientdet_d0
Number of parameters: 3,878,348


In [44]:
INPUT_H = 512
INPUT_W = 512
print("Using EfficientDet-D0 input size:", INPUT_H, INPUT_W)

Using EfficientDet-D0 input size: 512 512


## Put model on device & define inference helper

In [45]:
import torchvision.transforms as T
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

transform = T.Compose([
    T.ToTensor(),   # HWC uint8 RGB -> CHW float32 [0,1]
])

def run_efficientdet_on_image(img_bgr, score_thresh=0.05):
    """
    Runs EfficientDet on a single BGR image.

    - Resizes input to EfficientDet's expected size (INPUT_W, INPUT_H)
    - Runs model
    - Rescales boxes back to the original image size
    """
    if img_bgr is None:
        raise ValueError("img_bgr is None (cv2.imread failed)")

    # Original size
    orig_h, orig_w = img_bgr.shape[:2]

    # BGR -> RGB
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    # Resize to model input size
    img_resized = cv2.resize(img_rgb, (INPUT_W, INPUT_H))

    # To tensor
    tensor = transform(img_resized)          # [3, H_in, W_in]
    _, H_in, W_in = tensor.shape

    with torch.no_grad():
        batch = tensor.unsqueeze(0).to(device)   # [1, 3, H_in, W_in]
        preds = model(batch)

        # Most effdet versions: list[dict] with 'boxes', 'scores', 'labels'
        if isinstance(preds, (list, tuple)):
            p = preds[0]
            if isinstance(p, dict):
                boxes = p["boxes"].cpu().numpy()
                scores = p["scores"].cpu().numpy()
                labels = p["labels"].cpu().numpy()
            else:
                arr = p.cpu().numpy()
                boxes = arr[:, :4]
                scores = arr[:, 4]
                labels = arr[:, 5].astype(int)
        elif isinstance(preds, torch.Tensor):
            arr = preds[0].cpu().numpy()
            boxes = arr[:, :4]
            scores = arr[:, 4]
            labels = arr[:, 5].astype(int)
        else:
            raise RuntimeError(f"Unknown EfficientDet output type: {type(preds)}")

    # Filter by confidence
    keep = scores >= score_thresh
    boxes = boxes[keep]
    scores = scores[keep]
    labels = labels[keep]

    if len(boxes) == 0:
        return boxes, scores, labels

    # Boxes are in xyxy for the resized image -> scale back to original
    scale_x = orig_w / W_in
    scale_y = orig_h / H_in

    boxes[:, 0] *= scale_x   # x1
    boxes[:, 2] *= scale_x   # x2
    boxes[:, 1] *= scale_y   # y1
    boxes[:, 3] *= scale_y   # y2

    # Clip to original bounds
    boxes[:, 0] = np.clip(boxes[:, 0], 0, orig_w)
    boxes[:, 2] = np.clip(boxes[:, 2], 0, orig_w)
    boxes[:, 1] = np.clip(boxes[:, 1], 0, orig_h)
    boxes[:, 3] = np.clip(boxes[:, 3], 0, orig_h)

    return boxes, scores, labels


## Loop through ALL validation images

In [46]:
import tqdm
import json

results = []

for img_name in tqdm.tqdm(image_files):
    img_path = os.path.join(VAL_IMAGES, img_name)
    img = cv2.imread(img_path)

    if img is None:
        continue

    boxes, scores, labels = run_efficientdet_on_image(img, score_thresh=0.05)

    # Convert to COCO format
    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box.astype(float)
        w  = x2 - x1
        h  = y2 - y1

        results.append({
            "image_id": int(img_name.rstrip(".jpg")),
            "category_id": int(label),
            "bbox": [float(x1), float(y1), float(w), float(h)],
            "score": float(score)
        })


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [08:15<00:00,  6.30it/s]


## Save results to JSON file

In [47]:
output_path = "efficientdet_results.json"

with open(output_path, "w") as f:
    json.dump(results, f)

print("Saved result file to:", output_path)
print("Total predictions:", len(results))


Saved result file to: efficientdet_results.json
Total predictions: 4861
