In [None]:
from functools import partial
from typing import Optional, Tuple
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from torchvision.datasets import CocoDetection  # use torchvision, not effdet.data
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchPredict
from effdet.efficientdet import HeadNet

from ml_carbucks import DATA_CAR_DD_DIR
from copy import deepcopy
import torch.nn.functional as F
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# 1. Create config & model
model_name = "tf_efficientdet_d0"  # start with small model
config = get_efficientdet_config(model_name)
config.num_classes = 3  # your classes
config.max_det_per_image = 100
config.image_size = (320, 320)  # or square int if config expects single int
# Optionally set other config fields:
# config.norm_kwargs = dict(eps=1e-3, momentum=0.01)

# 2. Build model
model = EfficientDet(config, pretrained_backbone=True)
model.class_net = HeadNet(config, num_outputs=config.num_classes)
# At this point, model and config should be consistent

# 3. Wrap for training
bench = DetBenchTrain(model).cuda()
predictor = DetBenchPredict(model).cuda()

# 4. Prepare dataset using torchvision
transform = transforms.Compose(
    [
        # transforms.Resize(config.image_size),
        transforms.ToTensor(),
    ]
)
train_dataset = CocoDetection(
    root=DATA_CAR_DD_DIR / "images" / "train",
    annFile=str(DATA_CAR_DD_DIR / "instances_train.json"),
    transform=deepcopy(transform),
)
val_dataset = CocoDetection(
    root=DATA_CAR_DD_DIR / "images" / "val",
    annFile=str(DATA_CAR_DD_DIR / "instances_val.json"),
    transform=deepcopy(transform),
)


def coco_to_effdet_targets(coco_dataset, scales: list, pad_xs: list, pad_ys: list):
    """
    Converts COCO annotations to effdet-compatible targets.
    Returns a list of dicts, one per image in coco_dataset.
    """
    # Group annotations by image_id
    targets = {
        "bbox": [],
        "cls": [],
    }

    for i, t in enumerate(coco_dataset):
        bboxes = []
        labels = []
        for ann in t:
            x, y, w, h = ann["bbox"]

            bboxes.append(
                [
                    x * scales[i] + pad_xs[i],
                    y * scales[i] + pad_ys[i],
                    (x + w) * scales[i] + pad_xs[i],
                    (y + h) * scales[i] + pad_ys[i],
                ]
            )
            labels.append(ann["category_id"])

        if len(bboxes) == 0:
            targets["bbox"].append(torch.zeros((0, 4), dtype=torch.float32))
            targets["cls"].append(torch.zeros((0,), dtype=torch.int64))
        else:
            targets["bbox"].append(torch.tensor(bboxes, dtype=torch.float32))
            targets["cls"].append(torch.tensor(labels, dtype=torch.int64))
    return targets


def resize_with_padding_tensor(
    img_tensor: torch.Tensor, img_size: Optional[int] = None
) -> Tuple[torch.Tensor, float, int, int]:
    """
    Efficiently resize [C,H,W] tensor to img_size x img_size with aspect ratio preserved,
    adding padding. Returns new tensor, scale, pad_x, pad_y.
    """
    C, H, W = img_tensor.shape
    if img_size is None:
        return img_tensor, 1.0, 0, 0

    scale = img_size / max(H, W)
    new_H, new_W = int(H * scale), int(W * scale)

    # Resize in a single step
    img_tensor = F.interpolate(
        img_tensor[None], size=(new_H, new_W), mode="bilinear", align_corners=False
    )[0]

    pad_x = (img_size - new_W) // 2
    pad_y = (img_size - new_H) // 2

    # Pad: (left, right, top, bottom)
    new_img = F.pad(
        img_tensor, (pad_x, img_size - new_W - pad_x, pad_y, img_size - new_H - pad_y)
    )

    return new_img, scale, pad_x, pad_y


def collate_fn(batch, img_size):

    scales = []
    pad_xs = []
    pad_ys = []
    imgs = []
    for i, p in enumerate(batch):
        img, scale, pad_x, pad_y = resize_with_padding_tensor(p[0], img_size)
        imgs.append(img)
        scales.append(scale)
        pad_xs.append(pad_x)
        pad_ys.append(pad_y)

    imgs = torch.stack(imgs)
    targets = coco_to_effdet_targets([p[1] for p in batch], scales, pad_xs, pad_ys)

    return imgs, targets


train_loader = DataLoader(
    train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=2,
    collate_fn=partial(collate_fn, img_size=config.image_size[0]),
)

val_loader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=2,
    collate_fn=partial(collate_fn, img_size=config.image_size[0]),
)

# 5. Training loop
optimizer = torch.optim.AdamW(bench.parameters(), lr=1e-3)


def move2cuda(imgs, targets, is_val: bool = False):
    new_imgs = imgs.cuda()
    new_targets = {
        "bbox": [t.cuda() for t in targets["bbox"]],
        "cls": [t.cuda() for t in targets["cls"]],
    }

    if is_val:
        new_targets["img_size"] = None  # type: ignore
        new_targets["img_scale"] = None  # type: ignore

    return new_imgs, new_targets

def ppp(tensor, xyxy, labels=[]):
    # print("Image shape:", tensor.shape)
    img = tensor.permute(1, 2, 0).cpu().numpy()
    # print("Image shape:", img.shape)
    plt.imshow(img)

    for i in range(len(xyxy)):
        box = xyxy[i]
        if len(labels) > i:
            label = labels[i]
        else:
            label = -1

        x1, y1, x2, y2 = box.cpu().numpy()
        x, y, w, h = x1, y1, x2 - x1, y2 - y1

        rect = Rectangle(
            (x, y), w, h, fill=False, color="red", linewidth=2
        )
        plt.text(x, y, str(label), color="white", fontsize=12)
        plt.gca().add_patch(rect)
        
    plt.axis("off")
    plt.show()



bench.train()
loader_iter = iter(train_loader)
l_imgs, l_targets = next(loader_iter) 
imgs, targets = move2cuda(l_imgs, l_targets, is_val=True)

for i in range(200):
    output = bench(imgs, targets)
    loss = output["loss"]
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i % 10 == 0:
        print(f"Step {i} loss: {loss.item():<.4f}")
    
    if loss.item() < 0.5:
        print("Loss is low, stopping training.")
        break


j = 0
ppp(imgs[j], targets["bbox"][j], targets["cls"][j]), targets["bbox"][j]

In [None]:
import numpy as np


ii = 0
# predictor.eval()
# predictor.train()
with torch.no_grad():
    output = predictor(imgs)

fd = torch.empty((0, 6)).cuda()
for detection in output[ii]:
    if detection[4] > 0.23:  # Filter by confidence
        fd = torch.vstack((fd, detection))


print(fd)

ppp(imgs[ii], fd[:, :4], fd[:, 5])

In [None]:
from typing import Optional, Tuple
import torch

def resize_with_padding_tensor(
    img_tensor: torch.Tensor, img_size: Optional[int] = None
) -> Tuple[torch.Tensor, float, int, int]:
    """
    Efficiently resize [C,H,W] tensor to img_size x img_size with aspect ratio preserved,
    adding padding. Returns new tensor, scale, pad_x, pad_y.
    """
    C, H, W = img_tensor.shape
    if img_size is None:
        return img_tensor, 1.0, 0, 0

    scale = img_size / max(H, W)
    new_H, new_W = int(H * scale), int(W * scale)

    # Resize in a single step
    img_tensor = F.interpolate(
        img_tensor[None], size=(new_H, new_W), mode="bilinear", align_corners=False
    )[0]

    pad_x = (img_size - new_W) // 2
    pad_y = (img_size - new_H) // 2

    # Pad: (left, right, top, bottom)
    new_img = F.pad(
        img_tensor, (pad_x, img_size - new_W - pad_x, pad_y, img_size - new_H - pad_y)
    )

    return new_img, scale, pad_x, pad_y

img_path = "/home/bachelor/ml-carbucks/data/car_dd/images/train/002080.jpg"

from PIL import Image
from torchvision import transforms
from torch import tensor as tt
img = Image.open(img_path).convert("RGB")
transform = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)
img_tensor = transform(img)
bbox_coco = torch.tensor([[200, 0, 400, 400]])

def convert_coco_to_xyxy_resized(singular_tensor, scale, pad_x, pad_y):
    x, y, w, h = singular_tensor
    x1 = x * scale + pad_x
    y1 = y * scale + pad_y
    x2 = (x + w) * scale + pad_x
    y2 = (y + h) * scale + pad_y
    return [x1, y1, x2, y2]

resized_tensor, scale, pad_x, pad_y = resize_with_padding_tensor(img_tensor, 320)

bbox_xyxy = convert_coco_to_xyxy_resized(bbox_coco[0], scale, pad_x, pad_y)
print(bbox_xyxy)

ppp(resized_tensor, tt([bbox_xyxy]), [tt(1)])





# ppp(resized_tensor[0], torch.tensor([[0, 0, 100, 100]]), [torch.tensor(1)])


In [None]:
print(f"x1:{bbox_coco[0,0]}, y1:{bbox_coco[0,1]}, x2:{bbox_coco[0,0]+bbox_coco[0,2]}, y2:{bbox_coco[0,1]+bbox_coco[0,3]}")
ppp(img_tensor, torch.tensor([[200,0,600,400]]), [1])