<a href="https://colab.research.google.com/github/PavanDaniele/drone-person-detection/blob/main/other_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set up: mount drive + import libraries

**Important Information:** We need to activate the GPU on Colab (_Runtime --> Change runtime type_). \
Every time you start a new session (or reopen the notebook after a few hours) check that the GPU is still active. If we are not using the GPU it can take up to tens of hours to train the models. \
_GPU T4 is the best choice._

In [None]:
# Run this Every time you start a new session
from google.colab import drive
drive.mount('/content/drive') # to mount google drive (to see/access it)

In [None]:
import shutil
import os
from PIL import Image

# Import base for EfficientDet:
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision import transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

from effdet.efficientdet import HeadNet
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchPredict
from effdet.bench import unwrap_bench
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json
from torchvision import transforms
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.patches as patches

### EfficientDet D0

Iniziamo ora ad allenare il modello EfficientDet. Utilizzeremo la libreria effdet di Ross Wightman, che semplifica tutto il processo:

In [None]:
!pip install git+https://github.com/rwightman/efficientdet-pytorch


Copy the entire dataset folder from Google Drive to Colab's local storage to increase speed during training:

In [None]:
src = '/content/drive/MyDrive/projectUPV/datasets/AERALIS_EfficientDet_D0'
dst = '/content/AERALIS_EfficientDet_D0_local'

# Delete the destination if it already exists (shutil.rmtree), then recopy from scratch
if os.path.exists(dst):
  shutil.rmtree(dst)
shutil.copytree(src, dst)
print("Copy completed")

In [None]:
!df -h / # It shows the total, used and free space on the root (/) of the Colab VM.

# Avail column: space still available for your files.

In [None]:
# Show space used by your local folder
!du -sh /content/AERALIS_EfficientDet_D0_local

In [None]:
# Show space occupied by various folders in /content/.
!du -h --max-depth=1 /content/

In [None]:
# Main parameters
BASE_DIR = dst
VARIANT = 'tf_efficientdet_d0'  # Change to 'd1' or 'd2' for other versions
IMAGE_SIZE = 512                # D0=512, D1=640, D2=768
NUM_CLASSES = 1                 # only one class (ex: 'person')
BATCH_SIZE = 16
EPOCHS = 100
PATIENCE = 20
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# To see the available GPU
print(torch.cuda.is_available()) # True = you have GPU --> if False then use device='cpu'
print(torch.cuda.device_count()) # Name of GPU

# If True and at least 1, you can use device=0.
# If you don't have GPU: use device='cpu' (much slower).
# Locally (not Colab): check with nvidia-smi from terminal.

We keep only those images that contain at least one bounding box:

In [None]:
class CocoDetectionTransformed(CocoDetection):
    def __init__(self, img_folder, ann_file, transform=None):
        super().__init__(img_folder, ann_file)
        self.transform = transform

        # 1) Trovo tutti gli image_id con almeno una bbox >1px
        valid_img_ids = {
            ann['image_id']
            for ann in self.coco.dataset['annotations']
            if ann['bbox'][2] > 1 and ann['bbox'][3] > 1
        }

        # 2) Conservo solo gli indici delle immagini valide
        self.valid_indices = [
            idx for idx, img_info in enumerate(self.coco.dataset['images'])
            if img_info['id'] in valid_img_ids
        ]

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, idx):
      real_idx = self.valid_indices[idx]
      img, target = super().__getitem__(real_idx)

      if self.transform:
          bboxes = [obj['bbox'] for obj in target]
          labels = [0] * len(bboxes)
          augmented = self.transform(
              image=np.array(img),
              bboxes=bboxes,
              category_id=labels
          )
          img = augmented['image']
          for obj, new_bbox in zip(target, augmented['bboxes']):
              obj['bbox'] = list(new_bbox)

      # qui estrai l'image_id CORRETTO
      img_id = self.coco.dataset['images'][real_idx]['id']
      return img, target, img_id

Creates a DataLoader to load images and annotations in COCO format:

In [None]:
def get_loader(img_dir, ann_path, shuffle):
    # Trasformazioni congiunte immagine + bbox in formato COCO
    transform = A.Compose([
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        # A.Normalize(),
        A.Normalize(
          mean=(0.485, 0.456, 0.406),  # ImageNet mean
          std=(0.229, 0.224, 0.225),   # ImageNet std
          max_pixel_value=255.0
        ),
        ToTensorV2(), # converte in torch.Tensor
    ], bbox_params=A.BboxParams(
        format='coco',
        label_fields=['category_id'],
        min_area=0.0,          # scarta bbox con area < 1px
        min_visibility=0.0     # scarta bbox con visibilità < 0%
      ))

    # Passiamo l’albumentations transform al dataset
    ds = CocoDetectionTransformed(img_dir, ann_path, transform=transform)

    return DataLoader(
        ds,
        batch_size=BATCH_SIZE,
        shuffle=shuffle,
        collate_fn=lambda x: tuple(zip(*x)),
        num_workers=2
    )

In [None]:
def get_loader_test(img_dir, ann_path):
    transform = A.Compose([
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        # A.Normalize(),
        A.Normalize(
          mean=(0.485, 0.456, 0.406),  # ImageNet mean
          std=(0.229, 0.224, 0.225),   # ImageNet std
          max_pixel_value=255.0
        ),
        ToTensorV2(),
    ], bbox_params=A.BboxParams(
        format='coco',
        label_fields=['category_id'],
        min_area=1.0,          # scarta bbox con area < 1px
        min_visibility=0.0     # scarta bbox con visibilità < 0%
      ))

    ds = CocoDetectionTransformed(img_dir, ann_path, transform=transform)

    return DataLoader(
        ds,
        batch_size=1,
        shuffle=False,
        collate_fn=lambda x: tuple(zip(*x)),
        num_workers=2
    )

In [None]:
# Loading the 3 sets (train, val, test):
train_loader = get_loader(f'{BASE_DIR}/train/images', f'{BASE_DIR}/annotations_train.json', True)
val_loader   = get_loader(f'{BASE_DIR}/val/images', f'{BASE_DIR}/annotations_val.json', False)
test_loader = get_loader_test(f'{BASE_DIR}/test/images', f'{BASE_DIR}/annotations_test.json')

Adam (short for Adaptive Moment Estimation) is one of the most widely used optimizers in deep learning.

The optimizer is an algorithm that updates model weights during training to reduce loss.
- _AdamW_ is a variant of Adam, which also includes a weight penalty (weight decay) in a more correct way than classical Adam.
- The _W_ stands for “Weight Decay fix”: it improves regularization and reduces overfitting.

In this way:
- Converges quickly (like Adam)
- Reduces overfitting better than Adam
- Is the standard in many modern models (such as BERT, EfficientDet, etc.).

In [None]:
# Model initialization
config = get_efficientdet_config(VARIANT) # load configuration for tf_efficientdet_d0
config.num_classes = NUM_CLASSES # sets the number of classes
config.image_size = (IMAGE_SIZE, IMAGE_SIZE) # sets the image size
# config.image_size = None # This tells the model to accept input of any size and apply the correct resizing internally
net = EfficientDet(config, pretrained_backbone=True) # Initialize the model (use pre-trained backbone)
# net = EfficientDet(config)
net.class_net = HeadNet(config, num_outputs=NUM_CLASSES) # Create the classifier (HeadNet)
model = DetBenchTrain(net, config).to(DEVICE) # He wraps it in a training module

# CONGELAMENTO del backbone
# Blocca i gradenti del backbone: nelle prime epoche si addestrano solo class_net e box_net
for param in net.backbone.parameters():
    param.requires_grad = False  # freeze backbone layers :contentReference[oaicite:3]{index=3}

# Ottimizzatore con solo parametri “trainabili”
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4
    # lr=1e-5
)  # learning rate iniziale per testate :contentReference[oaicite:4]{index=4}
# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4) # Use AdamW optimizer

HeadNet is the final block of the EfficientDet model that does box classification and regression.

EfficientDet has two “heads”:
- Classification head: says “what” is in the image (object class)
- Regression head: says “where” it is (bounding box)

When we use: _net.class_net = HeadNet(config, num_outputs=NUM_CLASSES)_ we are customizing the classification head of the model to use our number of classes, for example 1 for ‘person’.

Without this change, the model would remain pre-configured for COCO (80 classes), so it would miss everything in the custom dataset.

Now initialize values to track the best validation and apply early stopping:

In [None]:
# Numero di epoche dopo cui “scongelare” il backbone
freeze_epochs = 10

# Early stopping setup:
epoch = 0
epochs_no_improve = 0  # Count how many epochs passed without validation improvement
best_loss = float('inf')  # Best (minimal) validation loss so far
best_model_path = f'effdet_{VARIANT}_best.pth'  # File to save best weights

# Training loop
while epoch < EPOCHS and epochs_no_improve < PATIENCE:
    if epoch == freeze_epochs:
        for param in net.backbone.parameters():
            param.requires_grad = True   # ← unfreeze backbone layers
        # ricrea l’optimizer per includere ora tutti i parametri
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
        print(f"[Info] Backbone scongelato all'epoca {epoch}")

    model.train()  # Training mode
    total_loss = 0.0  # Sum of training losses this epoch

    for images, targets, _ in train_loader: # con _ scarto 'silenziosamente' il terzo elemento quando non serve
        images = torch.stack([img.to(DEVICE) for img in images])

        boxes = []
        labels = []

        for t in targets:
            b = []
            l = []
            for obj in t:
                x, y, w, h = obj['bbox']
                if w > 1 and h > 1:  # Filter invalid bboxes
                    x1, y1 = x, y
                    x2, y2 = x + w, y + h
                    b.append([x1, y1, x2, y2])
                    l.append(0)  # Class index (0 for single class)

            b = torch.tensor(b, dtype=torch.float32)
            l = torch.tensor(l, dtype=torch.int64)
            boxes.append(b.to(DEVICE))
            labels.append(l.to(DEVICE))

        # img_sizes = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(images), device=DEVICE)
        # img_sizes = torch.tensor([[img.shape[1], img.shape[2]] for img in images], device=DEVICE)
        # img_scales = torch.ones(len(images), device=DEVICE)
        img_sizes  = torch.tensor([[img.shape[1], img.shape[2]] for img in images], device=DEVICE)
        img_scales = torch.ones(len(images), device=DEVICE)

        loss_dict = model(images, {
            'bbox': boxes,
            'cls':  labels,
            'img_scale': img_scales,
            'img_size': img_sizes
        })

        # Forward pass and loss calculation
        #loss_dict = model(images, {
          #   'bbox': boxes,
          #  'cls': labels,
            # 'img_scale': img_scales,
            #'img_size': img_sizes
        #})
        loss = loss_dict['loss']

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        print(f"[Epoch {epoch}] Batch loss: {loss.item()}")

    # Validation phase
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, targets, _ in val_loader:
            images = torch.stack([img.to(DEVICE) for img in images])

            boxes = []
            labels = []

            for t in targets:
                b = []
                l = []
                for obj in t:
                    x, y, w, h = obj['bbox']
                    if w > 1 and h > 1:
                        x1, y1 = x, y
                        x2, y2 = x + w, y + h
                        b.append([x1, y1, x2, y2])
                        l.append(0)
                b = torch.tensor(b, dtype=torch.float32)
                l = torch.tensor(l, dtype=torch.int64)
                boxes.append(b.to(DEVICE))
                labels.append(l.to(DEVICE))

            # img_sizes = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(images), device=DEVICE)
            # img_sizes = torch.tensor([[img.shape[1], img.shape[2]] for img in images], device=DEVICE)
            # img_scales = torch.ones(len(images), device=DEVICE)

            img_sizes  = torch.tensor([[img.shape[1], img.shape[2]] for img in images], device=DEVICE)
            img_scales = torch.ones(len(images), device=DEVICE)

            loss_dict = model(images, {
                'bbox': boxes,
                'cls':  labels,
                'img_scale': img_scales,
                'img_size': img_sizes
            })
            #loss_dict = model(images, {
             #   'bbox': boxes,
              #  'cls': labels,
               # 'img_scale': img_scales,
                #'img_size': img_sizes
            #})
            loss = loss_dict['loss']

            if not torch.isnan(loss):
                val_loss += loss.item()
            else:
                print("NaN detected in loss, batch skipped.")

    avg_val_loss = val_loss / len(val_loader)
    print(f"[Epoch {epoch+1}/{EPOCHS}] Val loss: {avg_val_loss:.4f}")

    # Early stopping check
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        torch.save(model.state_dict(), best_model_path)
        epochs_no_improve = 0
        print("Best saved model.")
    else:
        epochs_no_improve += 1
        print(f"Epochs without improvement: {epochs_no_improve}/{PATIENCE}")

    epoch += 1

# Training end reason
if epochs_no_improve >= PATIENCE:
    print("Early stopping enabled.")
else:
    print("End of training: reached maximum number of epochs.")

Copy the weights of the best saved model (.pth) from your local file system to your Google Drive folder:

In [None]:
from torch.utils.data import SubsetRandomSampler

# Campionatori sui primi N esempi
small_train_sampler = SubsetRandomSampler(range(0, BATCH_SIZE * 5))
small_val_sampler   = SubsetRandomSampler(range(0, BATCH_SIZE * 2))

# DataLoader ridotti
small_train_loader = DataLoader(
  train_loader.dataset,
  batch_size=BATCH_SIZE,
  sampler=small_train_sampler,
  collate_fn=train_loader.collate_fn,
  num_workers=2
)
small_val_loader = DataLoader(
  val_loader.dataset,
  batch_size=BATCH_SIZE,
  sampler=small_val_sampler,
  collate_fn=val_loader.collate_fn,
  num_workers=2
)

In [None]:
EPOCHS = 11
PATIENCE = 4
freeze_epochs = 0

In [None]:
# — Overfit test singola immagine —
single_images, single_targets, _ = next(iter(small_train_loader))
img = single_images[0].unsqueeze(0).to(DEVICE)   # shape (1,3,512,512)
tgt = single_targets[0]

# Costruisci bbox xyxy in pixel assoluti
b, l = [], []
for obj in tgt:
    x,y,w,h = obj['bbox']
    if w>1 and h>1:
        b.append([x, y, x+w, y+h])
        l.append(0)
boxes  = [torch.tensor(b, dtype=torch.float32, device=DEVICE)]
labels = [torch.tensor(l, dtype=torch.int64,   device=DEVICE)]

# Scongela tutto il backbone e ricrea un optimizer rapido
for p in net.backbone.parameters():
    p.requires_grad = True
# tmp_opt = torch.optim.AdamW(model.parameters(), lr=1e-4)
tmp_opt = torch.optim.AdamW(model.parameters(), lr=1e-2)  # 0.01!


print("=== Overfit test single image ===")
for i in range(20):
    model.train()
    loss = model(
        img,
        {
          "bbox":      boxes,
          "cls":       labels,
          "img_scale": [1.0],
          "img_size":  [[IMAGE_SIZE, IMAGE_SIZE]],
        }
    )["loss"]
    tmp_opt.zero_grad()
    loss.backward()
    tmp_opt.step()
    print(f"Iter {i:02d}, loss = {loss.item():.4f}")

Ottimo, questo è il comportamento giusto per un overfit-test: in 20 iterazioni passi da ~1500 a ~0.15 di loss. Vuol dire che la pipeline (resize, transform, format delle bboxes, img_size/img_scale) è finalmente allineata alle aspettative di DetBenchTrain.

In [None]:
# (assicurati di avere optimizer a lr=1e-4 qui)
epoch = 0
epochs_no_improve = 0
best_loss = float('inf')
best_model_path = 'smoke_test_best.pth'

while epoch < EPOCHS and epochs_no_improve < PATIENCE:
    # — TRAINING
    model.train()
    total_loss = 0.0

    for images, targets, _ in small_train_loader:
        imgs_tensor = torch.stack([img.to(DEVICE) for img in images])

        # Costruisci liste di bbox xyxy e labels
        boxes_batch, labels_batch = [], []
        for t in targets:
            b, l = [], []
            for obj in t:
                x,y,w,h = obj['bbox']
                if w>1 and h>1:
                    b.append([x, y, x+w, y+h])
                    l.append(0)
            boxes_batch.append(torch.tensor(b, dtype=torch.float32, device=DEVICE))
            labels_batch.append(torch.tensor(l, dtype=torch.int64,   device=DEVICE))

        # Meta‑dati
        img_sizes  = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(imgs_tensor), device=DEVICE)
        img_scales = torch.ones(len(imgs_tensor), device=DEVICE)

        # Forward + loss
        loss = model(
            imgs_tensor,
            {
                'bbox':      boxes_batch,
                'cls':       labels_batch,
                'img_scale': img_scales,
                'img_size':  img_sizes
            }
        )["loss"]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"[Smoke Test] Epoch {epoch} train loss: {total_loss:.4f}")

    # — UNFREEZE rapido (se ti serve)
    if epoch == freeze_epochs:
        for p in net.backbone.parameters(): p.requires_grad = True
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

    # — VALIDATION
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets, _ in small_val_loader:
            imgs_tensor = torch.stack([img.to(DEVICE) for img in images])
            boxes_batch, labels_batch = [], []
            for t in targets:
                b, l = [], []
                for obj in t:
                    x,y,w,h = obj['bbox']
                    if w>1 and h>1:
                        b.append([x, y, x+w, y+h])
                        l.append(0)
                boxes_batch.append(torch.tensor(b, dtype=torch.float32, device=DEVICE))
                labels_batch.append(torch.tensor(l, dtype=torch.int64,   device=DEVICE))

            loss = model(
                imgs_tensor,
                {
                    'bbox':      boxes_batch,
                    'cls':       labels_batch,
                    'img_scale': torch.ones(len(imgs_tensor), device=DEVICE),
                    'img_size':  torch.tensor([[IMAGE_SIZE,IMAGE_SIZE]]*len(imgs_tensor), device=DEVICE)
                }
            )["loss"]
            val_loss += loss.item()

    avg_val_loss = val_loss / len(small_val_loader)
    print(f"[Smoke Test] Epoch {epoch} val loss: {avg_val_loss:.4f}")

    # Early stopping
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        torch.save(model.state_dict(), best_model_path)
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    epoch += 1

print("Smoke‑test completed!")

In [None]:
from effdet import DetBenchPredict

# Ricrea la configurazione esatta usata in training
config = get_efficientdet_config(VARIANT)
config.num_classes = NUM_CLASSES
config.image_size  = (IMAGE_SIZE, IMAGE_SIZE)  # o None se usi resize interno

# Istanzia il modello per la predizione
net_pred = EfficientDet(config, pretrained_backbone=False)
net_pred.class_net = HeadNet(config, num_outputs=NUM_CLASSES)
model_pred = DetBenchPredict(net_pred).to(DEVICE)

# Carica i pesi salvati dal tuo smoke‑test (o training)
model_pred.load_state_dict(torch.load(best_model_path, map_location=DEVICE))
model_pred.eval()

In [None]:
predictions = []
THRESHOLD = 0.3  # prova anche 0.0 per debug

with torch.no_grad():
    for images, _, img_ids in val_loader:
        for img, img_id in zip(images, img_ids):
            img_t = img.unsqueeze(0).to(DEVICE)
            output = model_pred(img_t)[0]  # già xyxy a 512×512

            boxes  = output[:, :4].cpu().numpy()
            print("▶ Raw boxes (prima dello scaling):")
            print(boxes[:5])
            print("   (dovrebbero essere tutti numeri tra 0 e 512)")
            scores = output[:, 4].cpu().numpy()
            labels = output[:, 5].cpu().numpy().astype(int)

            # ricostruisci nel formato COCO a dimensione originale
            orig = val_loader.dataset.coco.imgs[img_id]
            sx, sy = orig['width']/IMAGE_SIZE, orig['height']/IMAGE_SIZE

            for (x1,y1,x2,y2), score, label in zip(boxes, scores, labels):
                  if score < THRESHOLD:
                    continue
                  x1o, y1o = x1*sx, y1*sy
                  w, h = (x2-x1)*sx, (y2-y1)*sy

                  predictions.append({
                      "image_id":    int(img_id),
                      # "category_id": int(label) + 1,    # ← qui il +1!
                      "category_id": int(label),
                      "bbox":        [float(x1o), float(y1o), float(w),   float(h)],# [x1o, y1o, w, h],
                      "score":       float(score)
                  })
"""
            for (x1,y1,x2,y2), score, lab in zip(boxes, scores, labels):
                if score < THRESHOLD: continue
                x1o,y1o = x1*sx, y1*sy
                w, h   = (x2-x1)*sx, (y2-y1)*sy
                predictions.append({
                    "image_id":    int(img_id),
                    "category_id": int(lab)+1,
                    "bbox":        [x1o, y1o, w, h],
                    "score":       float(score)
                })
"""

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json

# 1) Salvo le predizioni
with open("results_coco.json", "w") as f:
    json.dump(predictions, f)

# 2) Carico le annotazioni GT e assicuro che 'info' esista
coco_gt = COCO(f"{BASE_DIR}/annotations_val.json")
if "info" not in coco_gt.dataset:
    coco_gt.dataset["info"] = {
        "description": "val annotations",
        "version": "1.0",
        "year": 2025,
        "contributor": "Daniele",
        "date_created": "2025/07/24"
    }

# 3) Carico i risultati e lancio la valutazione
coco_dt = coco_gt.loadRes("results_coco.json")
coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")

coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

In [None]:
# Stampa i primi 5 elementi grezzi
for i, p in enumerate(predictions[:5]):
    print(f"Pred {i}: image_id={p['image_id']}, bbox={p['bbox']}, score={p['score']:.3f}, cat={p['category_id']}")

# Se vuoi visualizzare a schermo una di quelle immagini con il box:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

# Prendi il primo prediction
p = predictions[0]
img_id = p['image_id']
bbox   = p['bbox']  # [x, y, w, h]

# Trova il path dell'immagine a partire da img_id
# (assumendo che test_loader.dataset.coco.imgs[img_id]['file_name'] dia il nome file)
file_name = val_loader.dataset.coco.imgs[img_id]['file_name']
img_path  = os.path.join(BASE_DIR, 'val', 'images', file_name)

# Carica e mostra
img = Image.open(img_path).convert('RGB')
fig,ax = plt.subplots(1)
ax.imshow(img)
rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3],
                         linewidth=2, edgecolor='r', facecolor='none')
ax.add_patch(rect)
plt.title(f"ID {img_id}, score {p['score']:.2f}")
plt.axis('off')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools.coco import COCO
from PIL import Image
import os

# 1) Prendi un real_idx (indice all’interno di coco.dataset['images'])
real_idx = val_loader.dataset.valid_indices[0]

# 2) Estraggo l’info corretta
img_info = val_loader.dataset.coco.dataset['images'][real_idx]
img_id   = img_info['id']
file_name = img_info['file_name']
img_path  = os.path.join(BASE_DIR, 'val', 'images', file_name)

# 3) Carico l’immagine
img = Image.open(img_path).convert('RGB')
fig, ax = plt.subplots(1, figsize=(10,6))
ax.imshow(img)

# 4) Disegno le GT (in verde)
coco_gt = COCO(f"{BASE_DIR}/annotations_val.json")
gt_ids  = coco_gt.getAnnIds(imgIds=[img_id])
for ann in coco_gt.loadAnns(gt_ids):
    x,y,w,h = ann['bbox']
    rect = patches.Rectangle((x,y), w, h,
                             linewidth=2, edgecolor='g', facecolor='none')
    ax.add_patch(rect)

# 5) Disegno le predizioni (in rosso)
for p in predictions:
    if p['image_id'] != img_id:
        continue
    x,y,w,h = p['bbox']
    rect = patches.Rectangle((x,y), w, h,
                             linewidth=2, edgecolor='r', facecolor='none')
    ax.add_patch(rect)

plt.title(f"GT (verde) vs PRED (rosso) su image_id={img_id}")
plt.axis('off')
plt.show()


In [None]:
def iou_xywh(boxA, boxB):
    ax, ay, wA, hA = boxA
    bx, by, wB, hB = boxB
    inter_x1 = max(ax, bx)
    inter_y1 = max(ay, by)
    inter_x2 = min(ax + wA, bx + wB)
    inter_y2 = min(ay + hA, by + hB)
    inter_w  = max(0, inter_x2 - inter_x1)
    inter_h  = max(0, inter_y2 - inter_y1)
    inter_area = inter_w * inter_h
    union = wA*hA + wB*hB - inter_area
    return (inter_area / union) if union > 0 else 0.0

In [None]:
# prendi le GT e le predizioni per img_id
gt_boxes   = [ann['bbox'] for ann in coco_gt.loadAnns(gt_ids)]
pred_boxes = [p['bbox']  for p in predictions if p['image_id']==img_id]

for i, pb in enumerate(pred_boxes[:10]):
    best_iou = max(iou_xywh(pb, gb) for gb in gt_boxes)
    print(f"Pred #{i}  score={predictions[i]['score']:.2f}  →  max IoU vs GT = {best_iou:.3f}")

In [None]:
print(len(predictions))

In [None]:
# 1) Prendi 1 immagine
single_images, single_targets, _ = next(iter(small_train_loader))
img  = single_images[0].unsqueeze(0).to(DEVICE)    # (1, 3, 512, 512)
tgt  = single_targets[0]                           # lista di dict

# 2) Costruisci bboxes in COCO‐format [x, y, w, h] normalizzato
b, l = [], []
for obj in tgt:
    x,y,w,h = obj['bbox']
    if w>1 and h>1:
        b.append([x/IMAGE_SIZE,      # x_min
                  y/IMAGE_SIZE,      # y_min
                  w/IMAGE_SIZE,      # width
                  h/IMAGE_SIZE])     # height
        l.append(0)
boxes  = [torch.tensor(b, dtype=torch.float32, device=DEVICE)]
labels = [torch.tensor(l, dtype=torch.int64,   device=DEVICE)]

# 3) Scongela tutto e ottimizza
for p in net.backbone.parameters(): p.requires_grad = True
tmp_opt = torch.optim.AdamW(model.parameters(), lr=1e-4)

print("=== Overfit test single image COCO‐format normalized xywh ===")
for i in range(20):
    model.train()
    loss = model(
        img,
        {
            "bbox": boxes,
            "cls":  labels,
        }
    )["loss"]
    tmp_opt.zero_grad()
    loss.backward()
    tmp_opt.step()
    print(f"Iter {i:02d}, loss = {loss.item():.4f}")

In [None]:
epoch = 0
epochs_no_improve = 0
best_loss = float('inf')
best_model_path = 'smoke_test_best.pth'

while epoch < EPOCHS and epochs_no_improve < PATIENCE:
    # — TRAINING
    model.train()
    total_loss = 0.0

    for images, targets, _ in small_train_loader:
        # 1) Impila le immagini
        imgs_tensor = torch.stack([img.to(DEVICE) for img in images])
        print("▷ imgs_tensor", imgs_tensor.shape, imgs_tensor.min(), imgs_tensor.max())

        # 2) Costruisci le liste di box e label
        boxes_batch = []
        labels_batch = []
        for t in targets:
            b, l = [], []
            """
              for obj in t:
                x, y, w, h = obj['bbox']
                if w > 1 and h > 1:
                  x1 = x      / IMAGE_SIZE
                  y1 = y      / IMAGE_SIZE
                  x2 = (x + w)/ IMAGE_SIZE
                  y2 = (y + h)/ IMAGE_SIZE
              """
            for obj in t:
              x, y, w, h = obj['bbox']
              if w > 1 and h > 1:
                x1 = x
                y1 = y
                x2 = x + w
                y2 = y + h
                b.append([x1, y1, x2, y2])
                l.append(0)
            boxes_batch.append(torch.tensor(b, dtype=torch.float32, device=DEVICE))
            labels_batch.append(torch.tensor(l, dtype=torch.int64,   device=DEVICE))

        print("▷ sample boxes px:", boxes_batch[0][:5])
        print("▷ sample labels:", labels_batch[0][:5])

        # 3) Costruisci img_sizes e img_scales
        img_sizes  = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(imgs_tensor), device=DEVICE)
        img_scales = torch.ones(len(imgs_tensor), device=DEVICE)

        # 4) Forward + loss
        loss_dict = model(imgs_tensor, {
            'bbox':      boxes_batch,
            'cls':       labels_batch,
            'img_scale': img_scales,
            'img_size':  img_sizes
        })
        loss = loss_dict['loss']

        # 5) Backward + step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"[Smoke Test] Epoch {epoch} train loss: {total_loss:.4f}")

    # — UNFREEZE rapido
    if epoch == freeze_epochs:
        for p in net.backbone.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
        print("[Smoke Test] Backbone unfrozen")

    # — VALIDATION
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets, _ in small_val_loader:
            imgs_tensor = torch.stack([img.to(DEVICE) for img in images])
            print("▷ imgs_tensor.shape, min/max:", imgs_tensor.shape, imgs_tensor.min().item(), imgs_tensor.max().item())

            boxes_batch = []
            labels_batch = []
            for t in targets:
                b, l = [], []
                """
                for obj in t:
                  x, y, w, h = obj['bbox']
                  if w > 1 and h > 1:
                    x1 = x      / IMAGE_SIZE
                    y1 = y      / IMAGE_SIZE
                    x2 = (x + w)/ IMAGE_SIZE
                    y2 = (y + h)/ IMAGE_SIZE
                """
                for obj in t:
                  x, y, w, h = obj['bbox']
                  if w > 1 and h > 1:
                    x1 = x
                    y1 = y
                    x2 = x + w
                    y2 = y + h
                    b.append([x1, y1, x2, y2])
                    l.append(0)
                boxes_batch.append(torch.tensor(b, dtype=torch.float32, device=DEVICE))
                labels_batch.append(torch.tensor(l, dtype=torch.int64,   device=DEVICE))


            print("▷ sample boxes px:", boxes_batch[0][:5])
            print("▷ sample labels:", labels_batch[0][:5])

            img_sizes  = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(imgs_tensor), device=DEVICE)
            img_scales = torch.ones(len(imgs_tensor), device=DEVICE)

            loss_dict = model(imgs_tensor, {
                'bbox':      boxes_batch,
                'cls':       labels_batch,
                'img_scale': img_scales,
                'img_size':  img_sizes
            })
            val_loss += loss_dict['loss'].item()

    avg_val_loss = val_loss / len(small_val_loader)
    print(f"[Smoke Test] Epoch {epoch} val loss: {avg_val_loss:.4f}")

    # — Early stopping
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        torch.save(model.state_dict(), best_model_path)
        epochs_no_improve = 0
        print("[Smoke Test] Best model saved")
    else:
        epochs_no_improve += 1
        print(f"[Smoke Test] No improvement: {epochs_no_improve}/{PATIENCE}")

    epoch += 1

print("Smoke‑test completed!")

In [None]:
epoch = 0
epochs_no_improve = 0
best_loss = float('inf')
best_model_path = 'smoke_test_best.pth'

while epoch < EPOCHS and epochs_no_improve < PATIENCE:
    # — TRAINING
    model.train()
    total_loss = 0.0

    for batch_idx, (images, targets, _) in enumerate(small_train_loader):
        # 1) Impila le immagini
        imgs_tensor = torch.stack([img.to(DEVICE) for img in images])

        # --- DEBUG STEP: stampa target e bbox sul primo batch della prima epoca
        if epoch == 0 and batch_idx == 0:
            print("Sample targets:", targets)
            print("Sample bboxes:", [obj['bbox'] for t in targets for obj in t][:5])

        # 2) Costruisci le liste di box e label
        boxes_batch = []
        labels_batch = []
        for t in targets:
            b, l = [], []
            for obj in t:
                x, y, w, h = obj['bbox']
                if w > 1 and h > 1:
                    b.append([x, y, x+w, y+h])
                    l.append(0)
            boxes_batch.append(torch.tensor(b, dtype=torch.float32, device=DEVICE))
            labels_batch.append(torch.tensor(l, dtype=torch.int64,   device=DEVICE))

        # 3) Costruisci img_sizes e img_scales
        img_sizes  = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(imgs_tensor), device=DEVICE)
        img_scales = torch.ones(len(imgs_tensor), device=DEVICE)

        # --- DEBUG STEP: controlla min/max dei pixel normalizzati
        if epoch == 0 and batch_idx == 0:
            print("▶ Image min/max:", imgs_tensor.min().item(), imgs_tensor.max().item())

        # 4) Forward + loss
        loss_dict = model(imgs_tensor, {
            'bbox':      boxes_batch,
            'cls':       labels_batch,
            'img_scale': img_scales,
            'img_size':  img_sizes
        })
        loss = loss_dict['loss']

        # --- DEBUG STEP: stampa la prima loss
        if epoch == 0 and batch_idx == 0:
            print(f"▶ First batch loss: {loss.item():.4f}")

        # 5) Backward + step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"[Smoke Test] Epoch {epoch} train loss: {total_loss:.4f}")

    # — UNFREEZE rapido
    if epoch == freeze_epochs:
        for p in net.backbone.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
        print("[Smoke Test] Backbone unfrozen")

    # — VALIDATION
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets, _ in small_val_loader:
            imgs_tensor = torch.stack([img.to(DEVICE) for img in images])

            boxes_batch = []
            labels_batch = []
            for t in targets:
                b, l = [], []
                for obj in t:
                    x, y, w, h = obj['bbox']
                    if w > 1 and h > 1:
                        b.append([x, y, x+w, y+h])
                        l.append(0)
                boxes_batch.append(torch.tensor(b, dtype=torch.float32, device=DEVICE))
                labels_batch.append(torch.tensor(l, dtype=torch.int64,   device=DEVICE))

            img_sizes  = torch.tensor([[IMAGE_SIZE, IMAGE_SIZE]] * len(imgs_tensor), device=DEVICE)
            img_scales = torch.ones(len(imgs_tensor), device=DEVICE)

            loss_dict = model(imgs_tensor, {
                'bbox':      boxes_batch,
                'cls':       labels_batch,
                'img_scale': img_scales,
                'img_size':  img_sizes
            })
            val_loss += loss_dict['loss'].item()

    avg_val_loss = val_loss / len(small_val_loader)
    print(f"[Smoke Test] Epoch {epoch} val loss: {avg_val_loss:.4f}")

    # — Early stopping
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        # torch.save(model.state_dict(), best_model_path)
        torch.save(net.state_dict(), 'best_net.pth')
        epochs_no_improve = 0
        print("[Smoke Test] Best model saved")
    else:
        epochs_no_improve += 1
        print(f"[Smoke Test] No improvement: {epochs_no_improve}/{PATIENCE}")

    epoch += 1

print("Smoke‑test completed!")


In [None]:
model.eval()
imgs, targets, img_ids = next(iter(small_val_loader))
imgs_tensor = torch.stack([img.to(DEVICE) for img in imgs])
with torch.no_grad():
    outputs = model(imgs_tensor)

# Disegno tutte le immagini del batch
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

for i, output in enumerate(outputs):
    img = imgs[i].permute(1,2,0).cpu().numpy()
    img = np.clip(img, 0, 1)

    boxes  = output[:,:4].cpu().numpy()
    scores = output[:,4].cpu().numpy()
    labels = output[:,5].cpu().numpy().astype(int)

    orig = small_val_loader.dataset.coco.imgs[img_ids[i]]
    sx, sy = orig['width']/IMAGE_SIZE, orig['height']/IMAGE_SIZE

    fig, ax = plt.subplots(1, figsize=(6,6))
    ax.imshow(img)
    ax.axis('off')
    ax.set_title(f"Smoke‑test Image {img_ids[i]}")

    for (x1,y1,x2,y2), s in zip(boxes, scores):
        # riporto e clamp on‑the‑fly
        x1, y1 = max(0, x1*sx), max(0, y1*sy)
        x2, y2 = min(orig['width'],  x2*sx), min(orig['height'], y2*sy)
        w, h = max(1, x2-x1), max(1, y2-y1)
        rect = patches.Rectangle((x1,y1), w,h,
                                 linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
        ax.text(x1, y1-3, f"{s:.2f}",
                color='black', backgroundcolor='yellow',
                fontsize=8, weight='bold')
    plt.show()

In [None]:
!mkdir -p /content/drive/MyDrive/projectUPV/datasets/AERALIS_EfficientDet_D0/weights

Verifichiamo che il file esista:

In [None]:
if not os.path.isfile(best_model_path):
  raise FileNotFoundError(f"Il file dei migliori pesi non è stato trovato: {best_model_path}")

weights_dir = '/content/drive/MyDrive/projectUPV/datasets/AERALIS_EfficientDet_D0/weights'
os.makedirs(weights_dir, exist_ok=True)
shutil.copy(best_model_path, weights_dir)
print("EfficientDet weights copied to Google Drive.")

Load the model in evaluation mode (inference):

In [None]:
config = get_efficientdet_config(VARIANT)
config.num_classes = NUM_CLASSES
config.image_size = (IMAGE_SIZE, IMAGE_SIZE)

net = EfficientDet(config, pretrained_backbone=False)
net.class_net = HeadNet(config, num_outputs=NUM_CLASSES)

model = DetBenchPredict(net).to(DEVICE)
model.load_state_dict(torch.load(best_model_path))
model.eval()

In [None]:
# Inference on complete test set
predictions = []

with torch.no_grad():
    for imgs, targets, img_ids in test_loader:
        for img, target, img_id in zip(imgs, targets, img_ids):
            img_tensor = img.unsqueeze(0).to(DEVICE)
            outputs = model(img_tensor)
            output = outputs[0]
            print("RAW output (first 5 rows):\n", output[:5])
            if output.ndim != 2 or output.shape[1] != 6:
                continue

            boxes  = output[:, :4].cpu().numpy()
            scores = output[:, 4].cpu().numpy()
            labels = output[:, 5].cpu().numpy().astype(int)

            # dimensioni originali
            orig_w = test_loader.dataset.coco.imgs[img_id]['width']
            orig_h = test_loader.dataset.coco.imgs[img_id]['height']
            scale_x = orig_w / IMAGE_SIZE
            scale_y = orig_h / IMAGE_SIZE

            for box, score, label in zip(boxes, scores, labels):
                x1, y1, x2, y2 = box

                # 1) ripristino dimensioni e clamp
                x1 = max(0,     x1 * scale_x)
                y1 = max(0,     y1 * scale_y)
                x2 = min(orig_w, x2 * scale_x)
                y2 = min(orig_h, y2 * scale_y)

                # 2) calcolo larghezza/altezza minime
                w = max(1, x2 - x1)
                h = max(1, y2 - y1)

                # 3) appendo predizione “pulita”
                predictions.append({
                    "image_id":    img_id,
                    "category_id": label + 1,
                    "bbox":        [x1, y1, w, h],
                    "score":       float(score)
                })

print(f"\nInference completed. Total predictions collected: {len(predictions)}")

In [None]:
# 1) Ricrea la configurazione come prima
config = get_efficientdet_config(VARIANT)
config.num_classes = NUM_CLASSES
config.image_size  = (IMAGE_SIZE, IMAGE_SIZE)
# config.image_size = None

# 2) Inizializza EfficientDet senza backbone pretrained (non serve per predict)
net_pred = EfficientDet(config, pretrained_backbone=False)
net_pred.class_net = HeadNet(config, num_outputs=NUM_CLASSES)
net_pred.load_state_dict(torch.load('best_net.pth', map_location=DEVICE))

THRESHOLD = 0.3
predictions = []

with torch.no_grad():
    for imgs, _, img_ids in small_val_loader:     # batch_size=1
        img_tensor = imgs[0].to(DEVICE).unsqueeze(0)
        output     = model_pred(img_tensor)[0]     # [num_detections, 6]

        # ora output ha già subito NMS e contiene box in pixel rispettosi
        boxes  = output[:, :4].cpu().numpy()
        scores = output[:, 4].cpu().numpy()
        labels = output[:, 5].cpu().numpy().astype(int)

        img_id = img_ids[0]
        for box, score, label in zip(boxes, scores, labels):
            if score < THRESHOLD:
                continue
            x1, y1, x2, y2 = box
            w, h = max(1, x2 - x1), max(1, y2 - y1)
            predictions.append({
                "image_id":    img_id,
                "category_id": label+1,
                "bbox":        [float(x1), float(y1), float(w), float(h)],
                "score":       float(score)
            })

print(f"Inference completed: {len(predictions)} boxes")
print(predictions[:5])

In [None]:
# 1) Ricrea la configurazione come prima
config = get_efficientdet_config(VARIANT)
config.num_classes = NUM_CLASSES
# config.image_size  = (IMAGE_SIZE, IMAGE_SIZE)
config.image_size = None

# 2) Inizializza EfficientDet senza backbone pretrained (non serve per predict)
net_pred = EfficientDet(config, pretrained_backbone=False)
net_pred.class_net = HeadNet(config, num_outputs=NUM_CLASSES)

# 3) Avvolgilo in DetBenchPredict e carica i pesi
model_pred = DetBenchPredict(net_pred).to(DEVICE)
# model_pred = DetBenchPredict(net_pred, config).to(DEVICE)
model_pred.load_state_dict(torch.load('smoke_test_best.pth'))
model_pred.eval()

# 4) Inference sul small_val_loader
predictions = []
with torch.no_grad():
    for imgs, targets, img_ids in small_val_loader:
        for img, img_id in zip(imgs, img_ids):
            # img è già un Tensor; aggiungo batch dim
            img_tensor = img.unsqueeze(0).to(DEVICE)

            # uso il modello di prediction, non train!
            outputs = model_pred(img_tensor)
            output  = outputs[0]
            # --- DEBUG 1: vedi le prime 5 raw preds
            if img_id == img_ids[0]:
                print("RAW preds [0:5]:\n", output[:5])

            if output.ndim != 2 or output.shape[1] != 6:
                continue

            boxes  = output[:, :4].cpu().numpy()
            scores = output[:, 4].cpu().numpy()
            labels = output[:, 5].cpu().numpy().astype(int)

            orig = small_val_loader.dataset.coco.imgs[img_id]
            scale_x = orig['width']  / IMAGE_SIZE
            scale_y = orig['height'] / IMAGE_SIZE

            for box, score, label in zip(boxes, scores, labels):
              x1, y1, x2, y2 = box
              w, h = max(1, x2 - x1), max(1, y2 - y1)
              if score < THRESHOLD:
                  continue

              predictions.append({
                  "image_id":    img_id,
                  "category_id": label+1,
                  "bbox":        [float(x1), float(y1), float(w), float(h)],
                  "score":       float(score)
              })
              """
            for box, score, label in zip(boxes, scores, labels):
#                x1, y1, x2, y2 = box
#                x1 = max(0,     x1 * scale_x)
#                y1 = max(0,     y1 * scale_y)
#                x2 = min(orig['width'],  x2 * scale_x)
#                y2 = min(orig['height'], y2 * scale_y)
#                w  = max(1, x2 - x1)
#                h  = max(1, y2 - y1)
                x1, y1, x2, y2 = box
                # --- DEBUG 2: scala SENZA clamp
                x1s, y1s = x1 * scale_x, y1 * scale_y
                x2s, y2s = x2 * scale_x, y2 * scale_y
                print(f"SCALED no‑clamp: {(x1s,y1s,x2s,y2s)}  score={score:.2f}")

                # poi reinstaura il clamp/filtri come prima:
                x1 = max(0,     x1s)
                y1 = max(0,     y1s)
                x2 = min(orig['width'],  x2s)
                y2 = min(orig['height'], y2s)
                w  = max(1, x2 - x1)
                h  = max(1, y2 - y1)

                predictions.append({
                  "image_id":    img_id,
                  "category_id": label + 1,
                  "bbox":        [x1, y1, w, h],
                  "score":       float(score)
                })
"""
print(f"Smoke‑test inference completed. Predictions: {len(predictions)}")
print(predictions[:5])

We save all predictions in a .json file in the format required by COCOeval:

In [None]:
# Convert all fields to native Python types
for pred in predictions:
    pred["category_id"] = int(pred["category_id"])
    pred["image_id"] = int(pred["image_id"])
    pred["score"] = float(pred["score"])
    pred["bbox"] = [float(x) for x in pred["bbox"]]

# Save to COCO-style JSON
with open("results_coco.json", "w") as f:
    json.dump(predictions, f)

In [None]:
# Dopo aver popolato `predictions` (con bbox e score NumPy)

# 1) Conversione ai tipi Python nativi e correzione category_id
cleaned = []
for p in predictions:
    img_id = int(p["image_id"])
    score  = float(p["score"])
    x, y, w, h = p["bbox"]

    cleaned.append({
        "image_id":    img_id,
        "category_id": 1,            # unica classe nel tuo dataset
        "bbox":        [float(x), float(y), float(w), float(h)],
        "score":       score,
    })
predictions = cleaned

# 2) Salvataggio su JSON
with open("results_coco.json", "w") as f:
    json.dump(predictions, f)

In [None]:
coco_gt = COCO(f"{BASE_DIR}/annotations_val.json")
if "info" not in coco_gt.dataset:
    coco_gt.dataset["info"] = {}

coco_dt = coco_gt.loadRes("results_coco.json")
coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

In [None]:
coco_gt = COCO(f"{BASE_DIR}/annotations_test.json")
if "info" not in coco_gt.dataset:
    coco_gt.dataset["info"] = {}
# loadRes prende lo stesso file che hai scritto durante l’inference col test_loader
coco_dt = coco_gt.loadRes("results_coco2.json")
coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

Final evaluation with pycocotools:

In [None]:
# Load ground truth annotations
coco_gt = COCO(f"{BASE_DIR}/annotations_test.json")

# Assicuriamoci che esista il campo 'info'
if 'info' not in coco_gt.dataset:
    coco_gt.dataset['info'] = {}

# Load predictions from file …
with open("results_coco.json", "r") as f:
    results = json.load(f)

# … poi prosegui con loadRes e COCOeval
coco_dt = coco_gt.loadRes(results)
coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

We note that values of AP and AR practically at zero indicate that the model is not making correct or meaningful predictions. In essence, it is not recognizing objects well. \
Let's try a small test on a few images and annotations to see if the model recognizes anything.

At this point, it may be worth considering that EfficientDet-D0 (or your current EfficientDet configuration) might not be the ideal detector for your specific dataset.

In addition to changing the model, we could also:
- Review preprocessing and augmentations to make the training data more representative of the test set.
- Experiment with learning rate schedulers (e.g., ReduceLROnPlateau or CosineAnnealingLR) to improve convergence.
- Adjust inference filtering thresholds (score and minimum box size) so as not to discard potentially valid predictions.

### EfficientDet D1

### MobileNetV2 + SSD Lite

### MobileNetV3 + SSD Lite