# R-CNN

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# PART 1 — DATASET & DATALOADERS

import os, random
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import functional as TF
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
if device == "cuda":
    torch.cuda.manual_seed_all(SEED)


CLASS_NAMES = ["Glioma", "Meningioma", "No Tumor", "Pituitary"]
IMG_SIZE = 256  # as decided

class BrainTumorYOLODataset(Dataset):
    """
    Expected structure:
      DATA_ROOT/
        Train/
          Glioma/images/*.png|jpg
          Glioma/labels/*.txt
          ...
        Val/
          same structure...

    YOLO label format per line:
      class_id x_center y_center width height   (all normalized)
    """
    def __init__(self, root_split, img_size=256, augment=True):
        self.root_split = root_split
        self.img_size = img_size
        self.augment = augment
        self.samples = []

        for cname in CLASS_NAMES:
            cdir = os.path.join(root_split, cname)
            img_dir = os.path.join(cdir, "images")
            lbl_dir = os.path.join(cdir, "labels")
            if not (os.path.isdir(img_dir) and os.path.isdir(lbl_dir)):
                continue

            for lbl_file in sorted(os.listdir(lbl_dir)):
                if not lbl_file.endswith(".txt"):
                    continue
                stem = os.path.splitext(lbl_file)[0]
                img_png = os.path.join(img_dir, stem + ".png")
                img_jpg = os.path.join(img_dir, stem + ".jpg")
                if os.path.exists(img_png):
                    img_path = img_png
                elif os.path.exists(img_jpg):
                    img_path = img_jpg
                else:
                    print(f"⚠ No image for label: {lbl_file}")
                    continue
                lbl_path = os.path.join(lbl_dir, lbl_file)
                self.samples.append((img_path, lbl_path))

        print(f"[{root_split}] Loaded {len(self.samples)} image-label pairs")

    def __len__(self):
        return len(self.samples)

    def _load_yolo_labels(self, lbl_path, W, H):
        """
        YOLO: cls x_c y_c w h (normalized).
        Convert to xyxy (pixels); labels become 1..4 (background=0 for FasterRCNN).
        """
        boxes = []
        labels = []
        with open(lbl_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls  = int(float(parts[0]))
                xc   = float(parts[1]) * W
                yc   = float(parts[2]) * H
                w    = float(parts[3]) * W
                h    = float(parts[4]) * H
                xmin = xc - w / 2
                ymin = yc - h / 2
                xmax = xc + w / 2
                ymax = yc + h / 2
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(cls + 1)  # 0 is background

        if len(boxes) == 0:
            boxes = [[0, 0, 1, 1]]
            labels = [1]

        return (
            torch.tensor(boxes, dtype=torch.float32),
            torch.tensor(labels, dtype=torch.int64),
        )

    def __getitem__(self, idx):
        img_path, lbl_path = self.samples[idx]
        img = Image.open(img_path).convert("RGB")
        img = img.resize((self.img_size, self.img_size))
        W, H = self.img_size, self.img_size

        boxes, labels = self._load_yolo_labels(lbl_path, W, H)

        if self.augment:
            if random.random() < 0.5:
                img = TF.hflip(img)
                boxes[:, [0, 2]] = W - boxes[:, [2, 0]]
            if random.random() < 0.5:
                img = TF.vflip(img)
                boxes[:, [1, 3]] = H - boxes[:, [3, 1]]

        img = TF.to_tensor(img)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx]),
        }

        return img, target

def collate_fn(batch):
    return tuple(zip(*batch))

Device: cuda


In [None]:
DATA_ROOT = "/content/drive/MyDrive/ObjectDetection_dataset"

train_root = os.path.join(DATA_ROOT, "Train")
val_root   = os.path.join(DATA_ROOT, "Val")

train_ds = BrainTumorYOLODataset(train_root, img_size=IMG_SIZE, augment=True)
val_ds   = BrainTumorYOLODataset(val_root,   img_size=IMG_SIZE, augment=False)

# FAST SETTINGS: num_workers=0, pin_memory=True
train_loader = DataLoader(
    train_ds, batch_size=2, shuffle=True,
    collate_fn=collate_fn, num_workers=0, pin_memory=True
)
val_loader = DataLoader(
    val_ds, batch_size=2, shuffle=False,
    collate_fn=collate_fn, num_workers=0, pin_memory=True
)

print(f"Train images: {len(train_ds)}, Val images: {len(val_ds)}")

[/content/drive/MyDrive/ObjectDetection_dataset/Train] Loaded 4737 image-label pairs
⚠ No image for label: image(53).txt
⚠ No image for label: image(60).txt
[/content/drive/MyDrive/ObjectDetection_dataset/Val] Loaded 510 image-label pairs
Train images: 4737, Val images: 510


# Baseline R-CNN

In [None]:
import time, numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator


# Light 2D backbone

class SimpleBackbone2D(nn.Module):
    def __init__(self, out_channels=256):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, 2, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),

            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),

            nn.Conv2d(128, out_channels, 3, 2, 1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True),
        )
        self.out_channels = out_channels

    def forward(self, x):
        return self.features(x)


num_classes = 1 + len(CLASS_NAMES)  # background + 4 tumors

backbone = SimpleBackbone2D(out_channels=256)
backbone.out_channels = 256

anchor_generator = AnchorGenerator(
    sizes=((32, 64, 128, 256),),
    aspect_ratios=((0.5, 1.0, 2.0),),
)
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
    featmap_names=["0"],
    output_size=7,
    sampling_ratio=2,
)

model = FasterRCNN(
    backbone=backbone,
    num_classes=num_classes,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler,

    # RPN speed tweaks:
    rpn_pre_nms_top_n_train=200,
    rpn_pre_nms_top_n_test=100,
    rpn_post_nms_top_n_train=100,
    rpn_post_nms_top_n_test=50,
)
model_name = "FasterRCNN_Baseline_2D_fast"
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): SimpleBackbone2D(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ReLU(inplace=True)
    )
  )
  (rpn): RegionProposalNetwork(
    (anchor_generator): AnchorGenerator()
    (head): RPNHead(
      (conv): Sequential(
        (0): Conv2dNo

In [None]:

# MADs (FLOPs-like)

def count_mads(model, input_size=(1, 3, IMG_SIZE, IMG_SIZE), device="cuda"):
    hooks, mads = [], []

    def conv_hook(m, inp, out):
        if not isinstance(m, nn.Conv2d):
            return
        x = inp[0]
        Cin  = x.shape[1]
        Cout = m.out_channels
        kH, kW = m.kernel_size
        Hout, Wout = out.shape[2], out.shape[3]
        groups = m.groups
        mads.append(Cout * (Cin // groups) * kH * kW * Hout * Wout)

    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            hooks.append(module.register_forward_hook(conv_hook))

    dummy = torch.randn(*input_size).to(device)
    model.eval()
    with torch.no_grad():
        _ = model(dummy)

    for h in hooks:
        h.remove()
    return sum(mads)

In [None]:
# IoU & simple detection metrics

def box_iou(boxes1, boxes2):
    area1 = (boxes1[:, 2] - boxes1[:, 0]).clamp(min=0) * \
            (boxes1[:, 3] - boxes1[:, 1]).clamp(min=0)
    area2 = (boxes2[:, 2] - boxes2[:, 0]).clamp(min=0) * \
            (boxes2[:, 3] - boxes2[:, 1]).clamp(min=0)

    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])
    wh = (rb - lt).clamp(min=0)
    inter = wh[:, :, 0] * wh[:, :, 1]
    union = area1[:, None] + area2 - inter
    return inter / union.clamp(min=1e-6)


def evaluate_detection_simple(model, loader, iou_thresh=0.5):
    model.eval()
    all_ious = []
    gt_total = 0
    matched_gt = 0

    with torch.no_grad():
        for images, targets in loader:
            imgs = [img.to(device) for img in images]
            preds = model(imgs)

            for pred, tgt in zip(preds, targets):
                gt_boxes = tgt["boxes"].to(device)
                gt_total += gt_boxes.shape[0]
                if gt_boxes.numel() == 0 or pred["boxes"].numel() == 0:
                    continue
                ious = box_iou(gt_boxes, pred["boxes"].to(device))
                max_ious, _ = ious.max(dim=1)
                all_ious.extend(max_ious.cpu().tolist())
                matched_gt += (max_ious >= iou_thresh).sum().item()

    mean_iou = float(np.mean(all_ious)) if len(all_ious) > 0 else 0.0
    recall = matched_gt / gt_total if gt_total > 0 else 0.0
    return mean_iou, recall

In [None]:
# Training loop

optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 5
train_losses, val_losses = [], []
val_mean_ious, val_recalls = [], []

print("\n--- TRAINING FAST BASELINE FASTER R-CNN (2D) ---\n")
train_start = time.time()

for ep in range(1, num_epochs + 1):
    # ---- TRAIN ----
    model.train()
    ep_loss = 0.0
    ep_start = time.time()

    for images, targets in train_loader:
        imgs = [img.to(device) for img in images]
        tgts = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, tgts)
        loss = sum(loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        ep_loss += loss.item()

    ep_loss /= max(len(train_loader), 1)
    train_losses.append(ep_loss)

    # ---- VALIDATION LOSS
    model.train()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            imgs = [img.to(device) for img in images]
            tgts = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(imgs, tgts)
            loss = sum(loss_dict.values())
            val_loss += loss.item()
    val_loss /= max(len(val_loader), 1)
    val_losses.append(val_loss)

    # VALIDATION IoU & Recall (eval mode)
    mean_iou, recall = evaluate_detection_simple(model, val_loader)
    val_mean_ious.append(mean_iou)
    val_recalls.append(recall)

    ep_time = time.time() - ep_start
    print(
        f"[{model_name}] Epoch {ep:>2}/{num_epochs} | "
        f"TrainLoss={ep_loss:7.4f} | ValLoss={val_loss:7.4f} | "
        f"ValMeanIoU={mean_iou:7.4f} | ValRecall={recall:7.4f} | "
        f"Time={ep_time:6.2f}s"
    )

train_end = time.time()
total_train_time = train_end - train_start

final_val_iou = val_mean_ious[-1] if val_mean_ious else 0.0
final_val_recall = val_recalls[-1] if val_recalls else 0.0


--- TRAINING FAST BASELINE FASTER R-CNN (2D) ---

[FasterRCNN_Baseline_2D_fast] Epoch  1/5 | TrainLoss= 0.6778 | ValLoss= 0.6717 | ValMeanIoU= 0.4332 | ValRecall= 0.3892 | Time=3163.78s
[FasterRCNN_Baseline_2D_fast] Epoch  2/5 | TrainLoss= 0.5930 | ValLoss= 0.7070 | ValMeanIoU= 0.4074 | ValRecall= 0.3892 | Time=217.52s
[FasterRCNN_Baseline_2D_fast] Epoch  3/5 | TrainLoss= 0.5713 | ValLoss= 0.6547 | ValMeanIoU= 0.5405 | ValRecall= 0.4144 | Time=218.99s
[FasterRCNN_Baseline_2D_fast] Epoch  4/5 | TrainLoss= 0.5602 | ValLoss= 0.6129 | ValMeanIoU= 0.5244 | ValRecall= 0.4649 | Time=216.83s
[FasterRCNN_Baseline_2D_fast] Epoch  5/5 | TrainLoss= 0.5484 | ValLoss= 0.6440 | ValMeanIoU= 0.5399 | ValRecall= 0.4847 | Time=218.09s


In [None]:
# Params, MADs, runtime, memory

params = sum(p.numel() for p in model.parameters())
mads   = count_mads(model, input_size=(1, 3, IMG_SIZE, IMG_SIZE), device=device)

print(f"\n[{model_name}] Params: {params}")
print(f"[{model_name}] MADs  : {mads:.3g}")

dummy = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)

# Inference runtime
model.eval()
if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

with torch.no_grad():
    for _ in range(5):
        _ = model([dummy.squeeze(0)])
if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
with torch.no_grad():
    for _ in range(20):
        _ = model([dummy.squeeze(0)])
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()

infer_ms = (t1 - t0) / 20 * 1000.0
mem_infer_mb = (
    torch.cuda.max_memory_allocated() / (1024 ** 2) if device == "cuda" else 0.0
)

# Train-step runtime
model.train()
dummy_target = [{
    "boxes": torch.tensor([[50.0, 50.0, 150.0, 150.0]], device=device),
    "labels": torch.tensor([1], dtype=torch.int64, device=device),
}]
opt2 = optim.SGD(model.parameters(), lr=1e-4)

if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

for _ in range(5):
    loss_dict = model([dummy.squeeze(0)], dummy_target)
    loss = sum(loss_dict.values())
    opt2.zero_grad()
    loss.backward()
    opt2.step()

if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
for _ in range(20):
    loss_dict = model([dummy.squeeze(0)], dummy_target)
    loss = sum(loss_dict.values())
    opt2.zero_grad()
    loss.backward()
    opt2.step()
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()

train_ms = (t1 - t0) / 20 * 1000.0
mem_train_mb = (
    torch.cuda.max_memory_allocated() / (1024 ** 2) if device == "cuda" else 0.0
)
mem_mb = max(mem_infer_mb, mem_train_mb)

print(f"\n[{model_name}] Inference / image: {infer_ms:.3f} ms")
print(f"[{model_name}] Train step / image: {train_ms:.3f} ms")
print(f"[{model_name}] Peak GPU memory: {mem_mb:.2f} MB")
print(f"[{model_name}] Total train time: {total_train_time/60:.2f} min")


[FasterRCNN_Baseline_2D_fast] Params: 14898517
[FasterRCNN_Baseline_2D_fast] MADs  : 1.22e+10

[FasterRCNN_Baseline_2D_fast] Inference / image: 11.405 ms
[FasterRCNN_Baseline_2D_fast] Train step / image: 29.891 ms
[FasterRCNN_Baseline_2D_fast] Peak GPU memory: 477.66 MB
[FasterRCNN_Baseline_2D_fast] Total train time: 67.25 min


In [None]:
# Save weights + metrics CSV

import pandas as pd

torch.save(model.state_dict(), f"/content/{model_name}.pth")

csv_path = f"/content/{model_name}_metrics.csv"
df = pd.DataFrame({
    "Model": [model_name],
    "Params": [params],
    "MADs": [mads],
    "Infer_ms": [infer_ms],
    "Train_ms": [train_ms],
    "Memory_MB": [mem_mb],
    "TotalTrainTimeSec": [total_train_time],
    "FinalValLoss": [val_losses[-1]],
    "FinalValMeanIoU": [final_val_iou],
    "FinalValRecall": [final_val_recall],
})
df.to_csv(csv_path, index=False)
print(f"\n[{model_name}] Metrics CSV saved at: {csv_path}")
print(f"[{model_name}] Done.")


[FasterRCNN_Baseline_2D_fast] Metrics CSV saved at: /content/FasterRCNN_Baseline_2D_fast_metrics.csv
[FasterRCNN_Baseline_2D_fast] Done.


# Oriented-1D R-CNN

In [None]:
import time, numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as TF


class OrientedConv1D(nn.Module):
    """
    Approx oriented 1D kernels:
    - Rotate feature map by D angles
    - Depthwise 1D conv along flattened spatial dim
    - Rotate back and average.
    """
    def __init__(self, channels, kernel_size=15, num_angles=4):
        super().__init__()
        self.channels = channels
        self.kernel_size = kernel_size
        self.num_angles = num_angles
        self.conv1d = nn.Conv1d(
            channels, channels,
            kernel_size=kernel_size,
            padding=kernel_size // 2,
            groups=channels,
            bias=False
        )

    def forward(self, x):
        B, C, H, W = x.shape
        out_sum = 0.0

        for a in range(self.num_angles):
            angle = a * 180.0 / self.num_angles
            x_rot = TF.rotate(x, angle, interpolation=TF.InterpolationMode.BILINEAR)
            flat = x_rot.view(B, C, -1)
            y = self.conv1d(flat)
            y = y.view(B, C, H, W)
            y_inv = TF.rotate(y, -angle, interpolation=TF.InterpolationMode.BILINEAR)
            out_sum = out_sum + y_inv

        return out_sum / self.num_angles


class OrientedBackbone(nn.Module):
    def __init__(self, out_channels=256, num_angles=4):
        super().__init__()
        self.block1 = nn.Sequential(
            OrientedConv1D(3, kernel_size=15, num_angles=num_angles),
            nn.BatchNorm2d(3),
            nn.ReLU(True),
            nn.Conv2d(3, 64, 3, 2, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
        )
        self.block2 = nn.Sequential(
            OrientedConv1D(64, kernel_size=15, num_angles=num_angles),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
        )
        self.block3 = nn.Sequential(
            OrientedConv1D(128, kernel_size=15, num_angles=num_angles),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.Conv2d(128, out_channels, 3, 2, 1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True),
        )
        self.out_channels = out_channels

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        return x


num_classes = 1 + len(CLASS_NAMES)

backbone = OrientedBackbone(out_channels=256, num_angles=4)
backbone.out_channels = 256

anchor_generator = AnchorGenerator(
    sizes=((32, 64, 128, 256),),
    aspect_ratios=((0.5, 1.0, 2.0),),
)
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
    featmap_names=["0"],
    output_size=7,
    sampling_ratio=2,
)

model = FasterRCNN(
    backbone=backbone,
    num_classes=num_classes,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler,

    # same RPN speed tweaks
    rpn_pre_nms_top_n_train=200,
    rpn_pre_nms_top_n_test=100,
    rpn_post_nms_top_n_train=100,
    rpn_post_nms_top_n_test=50,
)
model_name = "FasterRCNN_Oriented1D_fast"
model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): OrientedBackbone(
    (block1): Sequential(
      (0): OrientedConv1D(
        (conv1d): Conv1d(3, 3, kernel_size=(15,), stride=(1,), padding=(7,), groups=3, bias=False)
      )
      (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
    (block2): Sequential(
      (0): OrientedConv1D(
        (conv1d): Conv1d(64, 64, kernel_size=(15,), stride=(1,), padding=(7,), groups=64, bias=False)
      )
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): 

In [None]:
# MADs with OrientedConv1D

def count_mads_oriented(model, input_size=(1, 3, IMG_SIZE, IMG_SIZE), device="cuda"):
    hooks, mads = [], []

    def hook_fn(m, inp, out):
        x = inp[0]
        if isinstance(m, nn.Conv2d):
            Cin  = x.shape[1]
            Cout = m.out_channels
            kH, kW = m.kernel_size
            Hout, Wout = out.shape[2], out.shape[3]
            groups = m.groups
            mads.append(Cout * (Cin // groups) * kH * kW * Hout * Wout)
        elif isinstance(m, OrientedConv1D):
            B, C, H, W = x.shape
            K = m.kernel_size
            D = m.num_angles
            mads.append(D * C * K * H * W)  # approximate ops

    for module in model.modules():
        if isinstance(module, nn.Conv2d) or isinstance(module, OrientedConv1D):
            hooks.append(module.register_forward_hook(hook_fn))

    dummy = torch.randn(*input_size).to(device)
    model.eval()
    with torch.no_grad():
        _ = model(dummy)

    for h in hooks:
        h.remove()
    return sum(mads)

In [None]:


def box_iou(boxes1, boxes2):
    area1 = (boxes1[:, 2] - boxes1[:, 0]).clamp(min=0) * \
            (boxes1[:, 3] - boxes1[:, 1]).clamp(min=0)
    area2 = (boxes2[:, 2] - boxes2[:, 0]).clamp(min=0) * \
            (boxes2[:, 3] - boxes2[:, 1]).clamp(min=0)

    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])
    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])
    wh = (rb - lt).clamp(min=0)
    inter = wh[:, :, 0] * wh[:, :, 1]
    union = area1[:, None] + area2 - inter
    return inter / union.clamp(min=1e-6)


def evaluate_detection_simple(model, loader, iou_thresh=0.5):
    model.eval()
    all_ious = []
    gt_total = 0
    matched_gt = 0

    with torch.no_grad():
        for images, targets in loader:
            imgs = [img.to(device) for img in images]
            preds = model(imgs)

            for pred, tgt in zip(preds, targets):
                gt_boxes = tgt["boxes"].to(device)
                gt_total += gt_boxes.shape[0]
                if gt_boxes.numel() == 0 or pred["boxes"].numel() == 0:
                    continue
                ious = box_iou(gt_boxes, pred["boxes"].to(device))
                max_ious, _ = ious.max(dim=1)
                all_ious.extend(max_ious.cpu().tolist())
                matched_gt += (max_ious >= iou_thresh).sum().item()

    mean_iou = float(np.mean(all_ious)) if len(all_ious) > 0 else 0.0
    recall = matched_gt / gt_total if gt_total > 0 else 0.0
    return mean_iou, recall

In [None]:
# Training loop

optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 5  # same as baseline

train_losses, val_losses = [], []
val_mean_ious, val_recalls = [], []

print("\n--- TRAINING FAST ORIENTED-1D FASTER R-CNN (4 ANGLES) ---\n")
train_start = time.time()

for ep in range(1, num_epochs + 1):
    # ---- TRAIN ----
    model.train()
    ep_loss = 0.0
    ep_start = time.time()

    for images, targets in train_loader:
        imgs = [img.to(device) for img in images]
        tgts = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, tgts)
        loss = sum(loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        ep_loss += loss.item()

    ep_loss /= max(len(train_loader), 1)
    train_losses.append(ep_loss)

    # VALIDATION LOSS
    model.train()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            imgs = [img.to(device) for img in images]
            tgts = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(imgs, tgts)
            loss = sum(loss_dict.values())
            val_loss += loss.item()
    val_loss /= max(len(val_loader), 1)
    val_losses.append(val_loss)

    #  VALIDATION IoU & Recall
    mean_iou, recall = evaluate_detection_simple(model, val_loader)
    val_mean_ious.append(mean_iou)
    val_recalls.append(recall)

    ep_time = time.time() - ep_start
    print(
        f"[{model_name}] Epoch {ep:>2}/{num_epochs} | "
        f"TrainLoss={ep_loss:7.4f} | ValLoss={val_loss:7.4f} | "
        f"ValMeanIoU={mean_iou:7.4f} | ValRecall={recall:7.4f} | "
        f"Time={ep_time:6.2f}s"
    )

train_end = time.time()
total_train_time = train_end - train_start

final_val_iou = val_mean_ious[-1] if val_mean_ious else 0.0
final_val_recall = val_recalls[-1] if val_recalls else 0.0


--- TRAINING FAST ORIENTED-1D FASTER R-CNN (4 ANGLES) ---

[FasterRCNN_Oriented1D_fast] Epoch  1/5 | TrainLoss= 0.7959 | ValLoss= 0.7978 | ValMeanIoU= 0.2271 | ValRecall= 0.2252 | Time=878.55s
[FasterRCNN_Oriented1D_fast] Epoch  2/5 | TrainLoss= 0.6355 | ValLoss= 0.8250 | ValMeanIoU= 0.3682 | ValRecall= 0.3171 | Time=881.83s
[FasterRCNN_Oriented1D_fast] Epoch  3/5 | TrainLoss= 0.6298 | ValLoss= 0.7383 | ValMeanIoU= 0.5113 | ValRecall= 0.4829 | Time=883.70s
[FasterRCNN_Oriented1D_fast] Epoch  4/5 | TrainLoss= 0.6387 | ValLoss= 0.7406 | ValMeanIoU= 0.6037 | ValRecall= 0.7027 | Time=883.56s
[FasterRCNN_Oriented1D_fast] Epoch  5/5 | TrainLoss= 0.6348 | ValLoss= 0.6720 | ValMeanIoU= 0.6463 | ValRecall= 0.7009 | Time=884.90s


In [None]:
# Params, MADs, runtime, memory

params = sum(p.numel() for p in model.parameters())
mads   = count_mads_oriented(model, input_size=(1, 3, IMG_SIZE, IMG_SIZE), device=device)

print(f"\n[{model_name}] Params: {params}")
print(f"[{model_name}] MADs  : {mads:.3g}")

dummy = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)

model.eval()
if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

with torch.no_grad():
    for _ in range(5):
        _ = model([dummy.squeeze(0)])
if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
with torch.no_grad():
    for _ in range(20):
        _ = model([dummy.squeeze(0)])
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()

infer_ms = (t1 - t0) / 20 * 1000.0
mem_infer_mb = (
    torch.cuda.max_memory_allocated() / (1024 ** 2) if device == "cuda" else 0.0
)

model.train()
dummy_target = [{
    "boxes": torch.tensor([[50.0, 50.0, 150.0, 150.0]], device=device),
    "labels": torch.tensor([1], dtype=torch.int64, device=device),
}]
opt2 = optim.SGD(model.parameters(), lr=1e-4)

if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

for _ in range(5):
    loss_dict = model([dummy.squeeze(0)], dummy_target)
    loss = sum(loss_dict.values())
    opt2.zero_grad()
    loss.backward()
    opt2.step()

if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
for _ in range(20):
    loss_dict = model([dummy.squeeze(0)], dummy_target)
    loss = sum(loss_dict.values())
    opt2.zero_grad()
    loss.backward()
    opt2.step()
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()

train_ms = (t1 - t0) / 20 * 1000.0
mem_train_mb = (
    torch.cuda.max_memory_allocated() / (1024 ** 2) if device == "cuda" else 0.0
)
mem_mb = max(mem_infer_mb, mem_train_mb)

print(f"\n[{model_name}] Inference / image: {infer_ms:.3f} ms")
print(f"[{model_name}] Train step / image: {train_ms:.3f} ms")
print(f"[{model_name}] Peak GPU memory: {mem_mb:.2f} MB")
print(f"[{model_name}] Total train time: {total_train_time/60:.2f} min")



[FasterRCNN_Oriented1D_fast] Params: 14901832
[FasterRCNN_Oriented1D_fast] MADs  : 1.33e+10

[FasterRCNN_Oriented1D_fast] Inference / image: 42.074 ms
[FasterRCNN_Oriented1D_fast] Train step / image: 160.282 ms
[FasterRCNN_Oriented1D_fast] Peak GPU memory: 1295.31 MB
[FasterRCNN_Oriented1D_fast] Total train time: 73.54 min


In [None]:
# Save weights + metrics CSV

import pandas as pd

torch.save(model.state_dict(), f"/content/{model_name}.pth")

csv_path = f"/content/{model_name}_metrics.csv"
df = pd.DataFrame({
    "Model": [model_name],
    "Params": [params],
    "MADs": [mads],
    "Infer_ms": [infer_ms],
    "Train_ms": [train_ms],
    "Memory_MB": [mem_mb],
    "TotalTrainTimeSec": [total_train_time],
    "FinalValLoss": [val_losses[-1]],
    "FinalValMeanIoU": [final_val_iou],
    "FinalValRecall": [final_val_recall],
})
df.to_csv(csv_path, index=False)
print(f"\n[{model_name}] Metrics CSV saved at: {csv_path}")
print(f"[{model_name}] Done.")


[FasterRCNN_Oriented1D_fast] Metrics CSV saved at: /content/FasterRCNN_Oriented1D_fast_metrics.csv
[FasterRCNN_Oriented1D_fast] Done.


# Comparision of metrics

In [None]:
# PART 4 — METRIC COMPARISON (FAST BASELINE vs FAST ORIENTED-1D)

import pandas as pd
import matplotlib.pyplot as plt
import os

baseline_csv = "/content/FasterRCNN_Baseline_2D_fast_metrics.csv"
oriented_csv = "/content/FasterRCNN_Oriented1D_fast_metrics.csv"

df_b = pd.read_csv(baseline_csv)
df_o = pd.read_csv(oriented_csv)

df_b["ModelType"] = "Baseline"
df_o["ModelType"] = "Oriented1D"

df = pd.concat([df_b, df_o], ignore_index=True)
display(df)

metrics = [
    "Params",
    "MADs",
    "Infer_ms",
    "Train_ms",
    "Memory_MB",
    "FinalValLoss",
    "FinalValMeanIoU",
    "FinalValRecall",
]

out_dir = "/content/detection_comparison_plots_fast"
os.makedirs(out_dir, exist_ok=True)
print("Saving plots to:", out_dir)

for m in metrics:
    plt.figure(figsize=(5,4))
    vals = [
        df[df.ModelType=="Baseline"][m].values[0],
        df[df.ModelType=="Oriented1D"][m].values[0],
    ]
    plt.bar(["Baseline","Oriented1D"], vals)
    plt.title(f"{m} Comparison")
    plt.ylabel(m)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(f"{out_dir}/{m}_comparison.png", dpi=200)
    plt.close()

print("Saved metric comparison plots.")

summary = pd.DataFrame({
    "Metric": metrics,
    "Baseline":  [df_b[m].values[0] for m in metrics],
    "Oriented1D":[df_o[m].values[0] for m in metrics],
})
summary["Diff (O1D - Base)"] = summary["Oriented1D"] - summary["Baseline"]
print("\n===== FAST DETECTION METRIC COMPARISON =====\n")
display(summary)

summary.to_csv(f"{out_dir}/detection_comparison_summary_fast.csv", index=False)
print("Summary CSV:", f"{out_dir}/detection_comparison_summary_fast.csv")

Unnamed: 0,Model,Params,MADs,Infer_ms,Train_ms,Memory_MB,TotalTrainTimeSec,FinalValLoss,FinalValMeanIoU,FinalValRecall,ModelType
0,FasterRCNN_Baseline_2D_fast,14898517,12226560000,11.404622,29.890966,477.663086,4035.218812,0.643953,0.53993,0.484685,Baseline
1,FasterRCNN_Oriented1D_fast,14901832,13263360000,65.307355,160.503936,1297.27832,4412.536252,0.671964,0.64625,0.700901,Oriented1D


Saving plots to: /content/detection_comparison_plots_fast
Saved metric comparison plots.

===== FAST DETECTION METRIC COMPARISON =====



Unnamed: 0,Metric,Baseline,Oriented1D,Diff (O1D - Base)
0,Params,14898520.0,14901830.0,3315.0
1,MADs,12226560000.0,13263360000.0,1036800000.0
2,Infer_ms,11.40462,65.30735,53.90273
3,Train_ms,29.89097,160.5039,130.613
4,Memory_MB,477.6631,1297.278,819.6152
5,FinalValLoss,0.6439534,0.6719637,0.02801027
6,FinalValMeanIoU,0.5399303,0.64625,0.1063197
7,FinalValRecall,0.4846847,0.7009009,0.2162162


Summary CSV: /content/detection_comparison_plots_fast/detection_comparison_summary_fast.csv
