In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import glob
import json
import time
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [None]:
class RadarDataset(Dataset):
    def __init__(self, image_dir, label_dir, S=7, B=2, C=20):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.S = S
        self.B = B
        self.C = C
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize((128, 128)),
            transforms.ToTensor()
        ])

        self.image_paths = sorted(glob.glob(os.path.join(image_dir, '*.png')))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label_path = os.path.join(self.label_dir, os.path.basename(image_path).replace('.png', '.txt'))

        image = Image.open(image_path)
        image = self.transform(image)

        label_matrix = torch.zeros((self.S, self.S, self.B * 5 + self.C))

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    cls, x, y, w, h = map(float, line.strip().split())
                    i = min(int(self.S * y), self.S - 1)
                    j = min(int(self.S * x), self.S - 1)
                    x_cell, y_cell = self.S * x - j, self.S * y - i

                    if label_matrix[i, j, 4] == 0:
                        label_matrix[i, j, 0:5] = torch.tensor([x_cell, y_cell, w, h, 1])
                        label_matrix[i, j, 5 + int(cls)] = 1

        return image, label_matrix


In [None]:
class YOLOv1(nn.Module):
    def __init__(self, S=7, B=2, C=20):
        super(YOLOv1, self).__init__()
        self.S = S
        self.B = B
        self.C = C

        self.conv = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(192, 128, kernel_size=1), nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=1), nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 8 * 8, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, S * S * (C + B * 5))
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x.view(-1, self.S, self.S, self.B * 5 + self.C)

In [None]:
class YoloLoss(nn.Module):
    def __init__(self, S=7, B=2, C=20, lambda_coord=5, lambda_noobj=0.5):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction='sum')
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj

    def forward(self, predictions, target):
        obj_mask = target[..., 4] == 1
        noobj_mask = target[..., 4] == 0

        # Localization loss
        box_pred = predictions[obj_mask][..., 0:4]
        box_target = target[obj_mask][..., 0:4]
        coord_loss = self.lambda_coord * self.mse(box_pred, box_target)

        # Confidence loss (object and no-object)
        conf_pred = predictions[..., 4]
        conf_target = target[..., 4]
        obj_conf_loss = self.mse(conf_pred[obj_mask], conf_target[obj_mask])
        noobj_conf_loss = self.lambda_noobj * self.mse(conf_pred[noobj_mask], conf_target[noobj_mask])

        # Classification loss
        class_pred = predictions[obj_mask][..., 5:]
        class_target = target[obj_mask][..., 5:]
        class_loss = self.mse(class_pred, class_target)

        return coord_loss + obj_conf_loss + noobj_conf_loss + class_loss

In [None]:
def iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[0]+box1[2], box2[0]+box2[2])
    y2 = min(box1[1]+box1[3], box2[1]+box2[3])

    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    box1_area = box1[2] * box1[3]
    box2_area = box2[2] * box2[3]
    union = box1_area + box2_area - inter_area
    return inter_area / union if union else 0

def nms(boxes, iou_thresh=0.5):
    boxes = sorted(boxes, key=lambda x: x[1], reverse=True)
    keep = []
    while boxes:
        best = boxes.pop(0)
        keep.append(best)
        boxes = [b for b in boxes if iou(b[2:], best[2:]) < iou_thresh]
    return keep

In [None]:
def decode_predictions(preds, S=7, B=2, C=20, conf_thresh=0.2):
    batch_size = preds.shape[0]
    decoded = []
    for b in range(batch_size):
        boxes = []
        pred = preds[b]
        for i in range(S):
            for j in range(S):
                for b_idx in range(B):
                    offset = b_idx * 5
                    conf = pred[i, j, offset + 4]
                    if conf > conf_thresh:
                        x = (pred[i, j, offset + 0] + j) / S
                        y = (pred[i, j, offset + 1] + i) / S
                        w = pred[i, j, offset + 2]
                        h = pred[i, j, offset + 3]
                        cls = torch.argmax(pred[i, j, B * 5:]).item()
                        boxes.append([cls, conf.item(), x, y, w, h])
        decoded.append(nms(boxes))
    return decoded

In [None]:
def evaluate(model, loader, device, class_names, image_dir, label_dir):
    model.eval()
    all_preds = {}
    start = time.time()

    with torch.no_grad():
        for i, (imgs, _) in enumerate(tqdm(loader, desc='Evaluating')):
            imgs = imgs.to(device)
            preds = model(imgs).cpu()
            decoded = decode_predictions(preds)
            for j in range(imgs.size(0)):
                global_idx = i * loader.batch_size + j
                all_preds[global_idx] = decoded[j]

    end = time.time()
    print(f"\n✅ Inference Speed: {len(loader.dataset)/(end-start):.2f} FPS")

    # Ground truth to COCO format
    coco_images, coco_annotations, ann_id = [], [], 1
    for img_id, img_file in enumerate(sorted(glob.glob(os.path.join(image_dir, '*.png')))):
        file_name = os.path.basename(img_file)
        coco_images.append({"id": img_id, "file_name": file_name, "width": 128, "height": 128})

        label_path = os.path.join(label_dir, file_name.replace('.png', '.txt'))
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    cls, x, y, w, h = map(float, line.strip().split())
                    x_min = (x - w/2) * 128
                    y_min = (y - h/2) * 128
                    coco_annotations.append({
                        "id": ann_id,
                        "image_id": img_id,
                        "category_id": int(cls),
                        "bbox": [x_min, y_min, w*128, h*128],
                        "area": w*128*h*128,
                        "iscrowd": 0
                    })
                    ann_id += 1

    gt_json = {"images": coco_images, "annotations": coco_annotations,
               "categories": [{"id": i, "name": n} for i, n in enumerate(class_names)]}
    with open("ground_truth.json", 'w') as f: json.dump(gt_json, f)

    # Predictions to COCO format
    pred_json = []
    for img_id, preds in all_preds.items():
        for pred in preds:
            cls, conf, x, y, w, h = pred
            x_min = (x - w / 2) * 128
            y_min = (y - h / 2) * 128
            pred_json.append({
                "image_id": img_id,
                "category_id": int(cls),
                "bbox": [x_min, y_min, w*128, h*128],
                "score": float(conf)
            })
    with open("predictions.json", 'w') as f: json.dump(pred_json, f)

    if len(pred_json) == 0:
        print("No predictions above threshold. Skipping mAP evaluation.")
        return

    # COCO mAP evaluation
    coco_gt = COCO("ground_truth.json")
    coco_dt = coco_gt.loadRes("predictions.json")
    coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()


In [None]:
def train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    class_names = [f"Class{i}" for i in range(20)]
    image_dir = "path/to/images"
    label_dir = "path/to/labels"

    dataset = RadarDataset(image_dir, label_dir, C=len(class_names))
    loader = DataLoader(dataset, batch_size=16, shuffle=False)

    model = YOLOv1(C=len(class_names)).to(device)
    criterion = YoloLoss(C=len(class_names))
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(5):
        model.train()
        loop = tqdm(loader, desc=f"Epoch {epoch+1}")
        total_loss = 0

        for imgs, labels in loop:
            imgs, labels = imgs.to(device), labels.to(device)
            preds = model(imgs)
            loss = criterion(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())

        print(f"Epoch {epoch+1} avg loss: {total_loss/len(loader):.4f}")

    evaluate(model, loader, device, class_names, image_dir, label_dir)

In [None]:
if __name__ == '__main__':
    train()