In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import glob
import json
import time
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [None]:
class RadarDataset(Dataset):
    def __init__(self, image_dir, label_dir, S=7, B=2, C=11):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.S = S
        self.B = B
        self.C = C
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize((128, 128)),
            transforms.ToTensor()
        ])
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, '*.png')))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label_path = os.path.join(self.label_dir, os.path.basename(image_path).replace('.png', '.txt'))

        image = Image.open(image_path)
        image = self.transform(image)

        label_matrix = torch.zeros((self.S, self.S, self.B * 5 + self.C), dtype=torch.float32)

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    cls, x, y, w, h = map(float, line.strip().split())
                    i = min(int(self.S * y), self.S - 1)
                    j = min(int(self.S * x), self.S - 1)
                    x_cell, y_cell = self.S * x - j, self.S * y - i

                    if label_matrix[i, j, 4] == 0: 
                        label_matrix[i, j, 4] = 1 
                        label_matrix[i, j, 0:4] = torch.tensor([x_cell, y_cell, w, h])
                        class_start_index = self.B * 5
                        label_matrix[i, j, class_start_index + int(cls)] = 1

        return image, label_matrix


In [None]:
class YOLOv1(nn.Module):
    def __init__(self, S=7, B=2, C=11):
        super(YOLOv1, self).__init__()
        self.S = S
        self.B = B
        self.C = C

        self.conv = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(192, 128, kernel_size=1), nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=1), nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 8 * 8, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, S * S * (C + B * 5))
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x.view(-1, self.S, self.S, self.B * 5 + self.C)

In [None]:
class YoloLoss(nn.Module):
    def __init__(self, S=7, B=2, C=11, lambda_coord=5, lambda_noobj=0.5):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction='sum')
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj

    def forward(self, predictions, target):
        N = predictions.shape[0]
        obj_mask = target[..., 4] > 0
        noobj_mask = target[..., 4] == 0

        coord_loss = 0
        obj_conf_loss = 0
        noobj_conf_loss = 0 
        class_loss = 0

        for b in range(N):
            for i in range(self.S):
                for j in range(self.S):=
                    # OBJECT LOSS 
                    if obj_mask[b, i, j]:
                        iou1 = iou(predictions[b, i, j, 0:4].unsqueeze(0), target[b, i, j, 0:4].unsqueeze(0))
                        iou2 = iou(predictions[b, i, j, 5:9].unsqueeze(0), target[b, i, j, 0:4].unsqueeze(0))

                        if iou1 >= iou2:
                            responsible_box_preds = predictions[b, i, j, 0:5]
                            other_box_conf_pred = predictions[b, i, j, 9] 
                        else:
                            responsible_box_preds = predictions[b, i, j, 5:10]
                            other_box_conf_pred = predictions[b, i, j, 4] 
                        
                        gt_box = target[b, i, j, 0:5]

                        # Coordinate loss 
                        responsible_box_preds_wh_sqrt = torch.sign(responsible_box_preds[2:4]) * torch.sqrt(torch.abs(responsible_box_preds[2:4] + 1e-6))
                        gt_box_wh_sqrt = torch.sqrt(gt_box[2:4])
                        coord_loss += self.lambda_coord * (self.mse(responsible_box_preds[:2], gt_box[:2]) + self.mse(responsible_box_preds_wh_sqrt, gt_box_wh_sqrt))
                        
                        # Object confidence loss 
                        obj_conf_loss += self.mse(responsible_box_preds[4:5], torch.ones_like(responsible_box_preds[4:5]))

                        # No-object confidence loss 
                        noobj_conf_loss += self.lambda_noobj * self.mse(other_box_conf_pred.unsqueeze(0), torch.zeros_like(other_box_conf_pred.unsqueeze(0)))
                        
                        # Classification loss
                        class_loss += self.mse(predictions[b, i, j, self.B*5:], target[b, i, j, self.B*5:])

        noobj_conf_preds = predictions[noobj_mask][..., [4, 9]]
        noobj_conf_targets = torch.zeros_like(noobj_conf_preds)
        noobj_conf_loss += self.lambda_noobj * self.mse(noobj_conf_preds, noobj_conf_targets)

        total_loss = (coord_loss + obj_conf_loss + noobj_conf_loss + class_loss) / N
        return total_loss

In [None]:
def iou(boxes1, boxes2):
    # Convert from (x, y, w, h) to (x1, y1, x2, y2)
    boxes1_x1 = boxes1[..., 0] - boxes1[..., 2] / 2
    boxes1_y1 = boxes1[..., 1] - boxes1[..., 3] / 2
    boxes1_x2 = boxes1[..., 0] + boxes1[..., 2] / 2
    boxes1_y2 = boxes1[..., 1] + boxes1[..., 3] / 2

    boxes2_x1 = boxes2[..., 0] - boxes2[..., 2] / 2
    boxes2_y1 = boxes2[..., 1] - boxes2[..., 3] / 2
    boxes2_x2 = boxes2[..., 0] + boxes2[..., 2] / 2
    boxes2_y2 = boxes2[..., 1] + boxes2[..., 3] / 2

    inter_x1 = torch.max(boxes1_x1, boxes2_x1)
    inter_y1 = torch.max(boxes1_y1, boxes2_y1)
    inter_x2 = torch.min(boxes1_x2, boxes2_x2)
    inter_y2 = torch.min(boxes1_y2, boxes2_y2)

    inter_area = (inter_x2 - inter_x1).clamp(0) * (inter_y2 - inter_y1).clamp(0)

    area1 = (boxes1_x2 - boxes1_x1) * (boxes1_y2 - boxes1_y1)
    area2 = (boxes2_x2 - boxes2_x1) * (boxes2_y2 - boxes2_y1)

    union = area1 + area2 - inter_area + 1e-6
    return inter_area / union


In [None]:
def nms(boxes, iou_thresh=0.5):
    boxes = sorted(boxes, key=lambda x: x[1], reverse=True)
    keep = []
    while boxes:
        best = boxes.pop(0)
        keep.append(best)
        boxes = [b for b in boxes if iou(
            torch.tensor(b[2:]), torch.tensor(best[2:])) < iou_thresh]
    return keep


In [None]:
def decode_predictions(preds, S=7, B=2, C=11, conf_thresh=0.1):
    batch_size = preds.shape[0]
    decoded = []

    for b in range(batch_size):
        boxes = []
        pred = preds[b]
        for i in range(S):
            for j in range(S):
                for b_idx in range(B):
                    offset = b_idx * 5
                    conf = pred[i, j, offset + 4]
                    if conf > conf_thresh:
                        x = (pred[i, j, offset + 0] + j) / S
                        y = (pred[i, j, offset + 1] + i) / S
                        w = pred[i, j, offset + 2]
                        h = pred[i, j, offset + 3]
                        cls = torch.argmax(pred[i, j, B * 5:]).item()
                        boxes.append([cls, conf.item(), x.item(), y.item(), w.item(), h.item()])
        decoded.append(nms(boxes))
    return decoded


In [None]:
def evaluate(model, loader, device, class_names, image_dir, label_dir):
    model.eval()
    all_preds = {}
    start = time.time()

    with torch.no_grad():
        for i, (imgs, _) in enumerate(tqdm(loader, desc='Evaluating')):
            imgs = imgs.to(device)
            preds = model(imgs).cpu()
            decoded = decode_predictions(preds)
            
            for j in range(imgs.size(0)):
                global_idx = i * loader.batch_size + j
                all_preds[global_idx] = decoded[j]

    end = time.time()
    print(f"\nInference Speed: {len(loader.dataset)/(end-start):.2f} FPS")

    coco_images, coco_annotations, ann_id = [], [], 1
    img_files = sorted(glob.glob(os.path.join(image_dir, '*.png')))
    
    for img_id, img_path in enumerate(img_files):
        file_name = os.path.basename(img_path)
        coco_images.append({
            "id": img_id,
            "file_name": file_name,
            "width": 128,
            "height": 128
        })

        label_path = os.path.join(label_dir, file_name.replace('.png', '.txt'))
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    cls, x, y, w, h = map(float, line.strip().split())
                    x_min = (x - w / 2) * 128
                    y_min = (y - h / 2) * 128
                    coco_annotations.append({
                        "id": ann_id,
                        "image_id": img_id,
                        "category_id": int(cls),
                        "bbox": [x_min, y_min, w * 128, h * 128],
                        "area": w * 128 * h * 128,
                        "iscrowd": 0
                    })
                    ann_id += 1


    categories = [{"id": i, "name": name, "supercategory": "none"} for i, name in enumerate(class_names)]

    gt_json = {
        "info": {
            "description": "Radar Dataset",
            "version": "1.0",
            "year": 2025,
            "contributor": "YourName",
            "date_created": time.strftime("%Y-%m-%d %H:%M:%S")
        },
        "images": coco_images,
        "annotations": coco_annotations,
        "categories": categories
    }

    with open("ground_truth.json", 'w') as f:
        json.dump(gt_json, f, indent=2)

    pred_json = []
    for img_id, preds in all_preds.items():
        for pred in preds:
            cls, conf, x, y, w, h = pred
            x_min = (x - w / 2) * 128
            y_min = (y - h / 2) * 128
            pred_json.append({
                "image_id": img_id,
                "category_id": int(cls),
                "bbox": [x_min, y_min, w * 128, h * 128],
                "score": float(conf)
            })

    with open("predictions.json", 'w') as f:
        json.dump(pred_json, f, indent=2)

    if not pred_json:
        print("No predictions were made. Skipping mAP evaluation.")
        return {"mAP": 0.0, "mAP50": 0.0, "mAP75": 0.0}

    coco_gt = COCO("ground_truth.json")
    coco_dt = coco_gt.loadRes("predictions.json")
    
    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "mAP": coco_eval.stats[0],   # mAP @ IoU=0.5:0.95
        "mAP50": coco_eval.stats[1],  # mAP @ IoU=0.50
        "mAP75": coco_eval.stats[2]   # mAP @ IoU=0.75
    }

    return metrics

In [None]:
def visualize_predictions(model, device, dataset, num_images=5):
    model.eval()
    
    for i in range(num_images):
        img, label = dataset[i] 
        gt_boxes = []
        for row in range(dataset.S):
            for col in range(dataset.S):
                if label[row, col, 4] > 0: 
                    gx = (label[row, col, 0] + col) / dataset.S
                    gy = (label[row, col, 1] + row) / dataset.S
                    gw = label[row, col, 2]
                    gh = label[row, col, 3]
                    gt_boxes.append([gx, gy, gw, gh])

        with torch.no_grad():
            img_tensor = img.unsqueeze(0).to(device)
            preds = model(img_tensor).cpu()
            decoded_preds = decode_predictions(preds, conf_thresh=0.2) 

        fig, ax = plt.subplots(1)
        ax.imshow(img.squeeze(0), cmap='gray')

        for box in gt_boxes:
            x, y, w, h = box
            x_min = (x - w / 2) * 128
            y_min = (y - h / 2) * 128
            width, height = w * 128, h * 128
            rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='g', facecolor='none')
            ax.add_patch(rect)

        if decoded_preds[0]:
            for box in decoded_preds[0]:
                cls, conf, x, y, w, h = box
                x_min = (x - w / 2) * 128
                y_min = (y - h / 2) * 128
                width, height = w * 128, h * 128
                rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
                plt.text(x_min, y_min - 5, f'Cls {cls}, C: {conf:.2f}', color='red', fontsize=8)

        plt.show()

In [None]:
def train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    class_names = [f"Class{i}" for i in range(11)]

    image_dir = os.path.expanduser("~/myenv/data/RadDet40k128HW009Tv2/images/test")
    label_dir = os.path.expanduser("~/myenv/data/RadDet40k128HW009Tv2/labels/test")

    # Verify paths exist
    assert os.path.isdir(image_dir), f"Image directory not found: {image_dir}"
    assert os.path.isdir(label_dir), f"Label directory not found: {label_dir}"

    print(f"Found {len(os.listdir(image_dir))} images.")
    print(f"Found {len(os.listdir(label_dir))} labels.")

    dataset = RadarDataset(image_dir, label_dir, C=len(class_names))
    loader = DataLoader(dataset, batch_size=16, shuffle=False)

    model = YOLOv1(C=len(class_names)).to(device)
    criterion = YoloLoss(C=len(class_names))
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(100):
        model.train()
        loop = tqdm(loader, desc=f"Epoch {epoch+1}")
        total_loss = 0

        for imgs, labels in loop:
            imgs, labels = imgs.to(device), labels.to(device)
            preds = model(imgs)
            loss = criterion(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())

        print(f"Epoch {epoch+1} avg loss: {total_loss / len(loader):.4f}")

    metrics = evaluate(model, loader, device, class_names, image_dir, label_dir)
    print(f"\nmAP: {metrics['mAP']:.4f}, mAP50: {metrics['mAP50']:.4f}, mAP75: {metrics['mAP75']:.4f}")
    print("Visualizing some predictions...")
    visualize_predictions(model, device, dataset)



In [None]:
if __name__ == '__main__':
    train()