## Create toy dataset

In [45]:
import torch
from pathlib import Path
import random

In [46]:
def generate_sphere_volume(
    shape=(64, 64, 64),
    radius_range=(4, 8),
):
    """
    Returns:
        volume: (1, D, H, W)
        box: (6,) -> (cx, cy, cz, dx, dy, dz)
    """
    D, H, W = shape
    volume = torch.zeros(1, D, H, W)

    r = random.randint(*radius_range)
    cx = random.randint(r, W - r - 1)
    cy = random.randint(r, H - r - 1)
    cz = random.randint(r, D - r - 1)

    z, y, x = torch.meshgrid(
        torch.arange(D),
        torch.arange(H),
        torch.arange(W),
        indexing="ij",
    )

    mask = (x - cx) ** 2 + (y - cy) ** 2 + (z - cz) ** 2 <= r ** 2
    volume[0][mask] = 1.0

    # Bounding box in (cx, cy, cz, dx, dy, dz)
    x1, x2 = cx - r, cx + r
    y1, y2 = cy - r, cy + r
    z1, z2 = cz - r, cz + r

    box = torch.tensor(
        [
            (x1 + x2) / 2,
            (y1 + y2) / 2,
            (z1 + z2) / 2,
            x2 - x1,
            y2 - y1,
            z2 - z1,
        ],
        dtype=torch.float32,
    )

    return volume, box

def create_toy_detection_dataset(
    root_dir,
    num_train=500,
    num_val=100,
    shape=(64, 64, 64),
):
    root = Path(root_dir)

    for split, n_samples in [("train", num_train), ("val", num_val)]:
        vol_dir = root / split / "volumes"
        tgt_dir = root / split / "targets"
        vol_dir.mkdir(parents=True, exist_ok=True)
        tgt_dir.mkdir(parents=True, exist_ok=True)

        for i in range(n_samples):
            volume, box = generate_sphere_volume(shape)

            torch.save(
                volume,
                vol_dir / f"sample_{i:04d}.pt",
            )

            torch.save(
                {
                    "boxes": box.unsqueeze(0),      # (1, 6)
                    "labels": torch.tensor([0]),    # single class
                },
                tgt_dir / f"sample_{i:04d}.pt",
            )

    print(f"Dataset written to: {root}")



In [47]:
create_toy_detection_dataset(
    root_dir="/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/toy_dataset",
    num_train=1000,
    num_val=200,
    shape=(128,)*3
)

Dataset written to: /home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/toy_dataset


In [33]:
import torch
import matplotlib.pyplot as plt
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display

def visualize_3d_bbox(
    root_dir,
    split="train",
    sample_name=None,
):
    """
    Interactive visualization for a 3D volume with 3D bounding box.
    
    Expected structure:
    root_dir/
        train/
            volumes/sample_xxxx.pt
            targets/sample_xxxx.pt
        val/
            volumes/sample_xxxx.pt
            targets/sample_xxxx.pt

    target format:
    {
        'boxes': tensor([[x, y, z, dx, dy, dz]]),
        'labels': tensor([class_id])
    }
    """

    root_dir = Path(root_dir)
    vol_dir = root_dir / split / "volumes"
    tgt_dir = root_dir / split / "targets"

    samples = sorted([p.stem for p in vol_dir.glob("*.pt")])
    if len(samples) == 0:
        raise RuntimeError("No samples found")

    if sample_name is None:
        sample_name = samples[0]

    vol = torch.load(vol_dir / f"{sample_name}.pt")
    tgt = torch.load(tgt_dir / f"{sample_name}.pt")

    vol = vol.squeeze().cpu()
    box = tgt["boxes"][0].cpu()  # [x, y, z, dx, dy, dz]

    x, y, z, dx, dy, dz = box
    x0, x1 = int(x - dx / 2), int(x + dx / 2)
    y0, y1 = int(y - dy / 2), int(y + dy / 2)
    z0, z1 = int(z - dz / 2), int(z + dz / 2)

    x0, y0, z0 = max(x0, 0), max(y0, 0), max(z0, 0)
    x1, y1, z1 = min(x1, vol.shape[2]-1), min(y1, vol.shape[1]-1), min(z1, vol.shape[0]-1)

    def plot_slice(axis, idx):
        plt.figure(figsize=(5, 5))

        if axis == "z":
            img = vol[idx]
            if z0 <= idx <= z1:
                plt.gca().add_patch(
                    plt.Rectangle(
                        (x0, y0),
                        x1 - x0,
                        y1 - y0,
                        fill=False,
                        edgecolor="red",
                        linewidth=2,
                    )
                )
            plt.imshow(img, cmap="gray")

        elif axis == "y":
            img = vol[:, idx, :]
            if y0 <= idx <= y1:
                plt.gca().add_patch(
                    plt.Rectangle(
                        (x0, z0),
                        x1 - x0,
                        z1 - z0,
                        fill=False,
                        edgecolor="red",
                        linewidth=2,
                    )
                )
            plt.imshow(img, cmap="gray")

        elif axis == "x":
            img = vol[:, :, idx]
            if x0 <= idx <= x1:
                plt.gca().add_patch(
                    plt.Rectangle(
                        (y0, z0),
                        y1 - y0,
                        z1 - z0,
                        fill=False,
                        edgecolor="red",
                        linewidth=2,
                    )
                )
            plt.imshow(img, cmap="gray")

        plt.title(f"{sample_name} | axis={axis} | slice={idx}")
        plt.axis("off")
        plt.show()

    axis_dd = widgets.Dropdown(
        options=["z", "y", "x"],
        value="z",
        description="Axis:",
    )

    slice_slider = widgets.IntSlider(
        min=0,
        max=vol.shape[0] - 1,
        step=1,
        value=int(z),
        description="Slice:",
        continuous_update=False,
    )

    def update_slider(*args):
        axis = axis_dd.value
        if axis == "z":
            slice_slider.max = vol.shape[0] - 1
            slice_slider.value = int(z)
        elif axis == "y":
            slice_slider.max = vol.shape[1] - 1
            slice_slider.value = int(y)
        elif axis == "x":
            slice_slider.max = vol.shape[2] - 1
            slice_slider.value = int(x)

    axis_dd.observe(update_slider, names="value")

    ui = widgets.VBox([axis_dd, slice_slider])
    out = widgets.interactive_output(
        plot_slice,
        {"axis": axis_dd, "idx": slice_slider},
    )

    display(ui, out)


# Example usage in notebook:
# visualize_3d_bbox(
#     root_dir="/path/to/dataset",
#     split="train",
#     sample_name="sample_0001"
# )


In [36]:
visualize_3d_bbox(root_dir="/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/toy_dataset", split="train", sample_name="sample_0011")

VBox(children=(Dropdown(description='Axis:', options=('z', 'y', 'x'), value='z'), IntSlider(value=16, continuo…

Output()

## Load the toy sphere dataset

In [1]:
from torch import nn
from qct_3d_nod_detect.structures import Instances3D, Boxes3D, ImagesList3D
import lightning.pytorch as pl
import torch
from typing import Optional, List, Dict
from qct_3d_nod_detect.base_lightning import BaseLightningModule

class GeneralizedRCNN3D(nn.Module):

    def __init__(
            self,
            backbone: nn.Module,
            rpn: nn.Module,
            roi_heads: nn.Module,
    ):

        super().__init__()
        self.backbone = backbone
        self.rpn = rpn
        self.roi_heads = roi_heads

    def forward_train(
            self,
            images: torch.Tensor,
            targets: Optional[List[Instances3D]] = None,
    ) -> Dict[str, torch.Tensor]:
        
        """
        Returns:
            training: dict of losses
            inference: List[Instances3D]
        """

        features: Dict[str, torch.Tensor] = self.backbone(images)

        image_list = ImagesList3D(
                tensor=images,
                image_sizes=[images.shape[-3:]] * images.shape[0],
                )
            
        proposals, rpn_losses = self.rpn(
            images=image_list,
            features=features,
            gt_instances=targets,
            training=True,
        )

        roi_losses = self.roi_heads(
            features=features,
            proposals=proposals,
            targets=targets,
            training=True
        )

        return {
            **rpn_losses,
            **roi_losses,
        }
    
    @torch.no_grad()
    def forward_inference(
        self,
        images: torch.Tensor,
    ) -> List[Instances3D]:
        
        features = self.backbone(images)

        image_list = ImagesList3D(
                tensor=images,
                image_sizes=[images.shape[-3:]] * images.shape[0],
        )

        proposals, _ = self.rpn(
            images=image_list,
            features=features,
            gt_instances=None,
            training=False,
        )

        for i, p in enumerate(proposals):
            print(f"Image {i}: #RPN proposals = {len(p)}")

        detections = self.roi_heads(
            features=features,
            proposals=proposals,
            targets=None,
            training=False
        )

        return detections

def build_targets(batch):
    targets = []

    B = len(batch["gt_boxes"])
    image_size = batch["image"].shape[-3:]

    for i in range(B):
        inst = Instances3D(image_size=image_size)
        inst.gt_boxes = Boxes3D(batch["gt_boxes"][i])
        inst.gt_classes = batch["gt_classes"][i].long()
        targets.append(inst)

    return targets

def build_image_list_3d(images: torch.Tensor) -> ImagesList3D:
    """
    Args:
        images: Tensor[B, C, D, H, W]
    """
    image_sizes = [tuple(images.shape[-3:]) for _ in range(images.shape[0])]
    return ImagesList3D(image_sizes)

def build_instances_3d(batch):
    instances = []

    for boxes, classes in zip(batch["gt_boxes"], batch["gt_classes"]):
        inst = Instances3D(image_size=batch["image"].shape[-3:])
        inst.gt_boxes = Boxes3D(boxes)
        inst.gt_classes = classes
        instances.append(inst)

    return instances

class FasterRCNN3DLightning(BaseLightningModule):

    def __init__(
            self,
            model: nn.Module,
            learning_rate: float = 1e-4,
            grad_clip_val: float = 0.0,
            grad_clip_algorithm: str = "norm",
            log_on: str = "step"
    ):

        super().__init__(learning_rate=learning_rate)

        self.model = model
        self.grad_clip_val = grad_clip_val
        self.grad_clip_algorithm = grad_clip_algorithm
        self.log_on = log_on
        
    def _build_targets(
        self,
        batch
    ):
        
        targets = []
        image_size = batch['image'].shape[-3:] # (D, H, W)

        for gt_boxes, gt_classes in zip(
            batch['gt_boxes'], batch['gt_classes']
        ):
            
            centers = gt_boxes[:, :3]      # (cx, cy, cz)
            sizes = gt_boxes[:, 3:]        # (dx, dy, dz)

            half_sizes = sizes * 0.5

            boxes_cc = torch.cat(
                [
                    centers - half_sizes,  # (x1, y1, z1)
                    centers + half_sizes,  # (x2, y2, z2)
                ],
                dim=1,
            )
            
            inst = Instances3D(image_size=image_size)
            inst.gt_boxes = Boxes3D(boxes_cc)
            inst.gt_classes = gt_classes.long()
            targets.append(inst)

        return targets
    
    def forward(
            self,
            images,
            targets=None,
    ):
        return self.model.forward_inference(images, targets)
    
    def training_step(
            self, 
            batch, 
            batch_idx
        ):

        images = batch["image"]
        targets = self._build_targets(batch)

        loss_dict = self.model.forward_train(images, targets)
        total_loss = sum(loss_dict.values())

        self.log_dict_helper(loss_dict, prefix="train/")
        self.log("train/loss_total", total_loss, prog_bar=True, sync_dist=True)

        return total_loss

    def validation_step(
            self, 
            batch, 
            batch_idx
        ):

        images = batch["image"]
        detections = self.model.forward_inference(images)

        num_boxes = sum(len(d) for d in detections)

        self.log(
            "val/num_boxes",
            num_boxes,
            prog_bar=True,
            sync_dist=True,
        )

        return detections

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(
            self.model.parameters(),
            lr=self.learning_rate,
            weight_decay=1e-4,
        )

        return optimizer

In [2]:
# Dataset
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import torch

class ToySphereDetectionDataset(Dataset):
    def __init__(self, root_dir, split="train"):
        self.root = Path(root_dir) / split
        self.vol_dir = self.root / "volumes"
        self.tgt_dir = self.root / "targets"

        self.ids = sorted(p.stem for p in self.vol_dir.glob("*.pt"))

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        sid = self.ids[idx]

        volume = torch.load(self.vol_dir / f"{sid}.pt")   # (1, D, H, W)
        target = torch.load(self.tgt_dir / f"{sid}.pt")

        return {
            "image": volume,                  # Tensor[1, D, H, W]
            "gt_boxes": target["boxes"],       # Tensor[N, 6]
            "gt_classes": target["labels"],   # Tensor[N]
        }
    
class ToySphereDetectionDataModule(pl.LightningDataModule):
    def __init__(
        self,
        root_dir: str,
        batch_size: int = 2,
        num_workers: int = 4,
        pin_memory: bool = True,
    ):
        super().__init__()
        self.root_dir = root_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.pin_memory = pin_memory

    def setup(self, stage=None):
        # Called once per process
        self.train_dataset = ToySphereDetectionDataset(
            root_dir=self.root_dir,
            split="train",
        )

        self.val_dataset = ToySphereDetectionDataset(
            root_dir=self.root_dir,
            split="val",
        )

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory,
            collate_fn=detection_collate,
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory,
            collate_fn=detection_collate,
        )

def detection_collate(batch):
    return {
        "image": torch.stack([b["image"] for b in batch], dim=0),
        "gt_boxes": [b["gt_boxes"] for b in batch],
        "gt_classes": [b["gt_classes"] for b in batch],
    }

train_dataset = ToySphereDetectionDataset(
    root_dir="/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/toy_dataset",
    split="train",
)

val_dataset = ToySphereDetectionDataset(
    root_dir="/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/toy_dataset",
    split="val",
)

datamodule = ToySphereDetectionDataModule(root_dir='/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/toy_dataset',
                                          batch_size=10,
                                          num_workers=8,
                                          pin_memory=False)

In [3]:
train_dataloader = DataLoader(
    train_dataset,
    batch_size=2,          # start small
    shuffle=True,
    num_workers=4,
    pin_memory=True,
    collate_fn=detection_collate,
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
    collate_fn=detection_collate,
)


In [4]:
batch = next(iter(train_dataloader))

print(batch["image"].shape)       # (B, 1, D, H, W)
print(len(batch["gt_boxes"]))     # B
print(batch["gt_boxes"][0].shape) # (N, 6)

torch.Size([2, 1, 128, 128, 128])
2
torch.Size([1, 6])


In [5]:
from qct_3d_nod_detect.rpn import RPN3D, StandardRPNHead3d
from qct_3d_nod_detect.faster_rcnn import FasterRCNNOutputLayers3D
from qct_3d_nod_detect.poolers import ROIPooler3D
from qct_3d_nod_detect.anchor_generator_3d import DefaultAnchorGenerator3D
from qct_3d_nod_detect.box_regression import Box3DTransform
from qct_3d_nod_detect.matcher import Matcher
from qct_3d_nod_detect.roi_heads import ROIHeads3D
from qct_3d_nod_detect.backbones import build_vit_backbone_with_fpn
from qct_3d_nod_detect.box_heads import FastRCNNConvFCHead3D
from qct_3d_nod_detect.layers import ShapeSpec
import math

In [6]:
anchor_generator_3d = DefaultAnchorGenerator3D(
    sizes=[[8], [16], [32], [64]],
    aspect_ratios_3d=[[(1.0, 1.0)], [(1.0, 1.0)], [(1.0, 1.0)], [(1.0, 1.0)]],
    strides=[4, 8, 16, 32],
    offset=0.5,
)

backbone_fpn = build_vit_backbone_with_fpn(
    variant="L",
    ckpt_path=None,
    scales=[1, 2, 0.5, 0.25],
    out_channels=256
)

box3d2box3d_transform = Box3DTransform(
    weights=(1.0, 1.0, 1.0, 1.0, 1.0, 1.0),
    scale_clamp=math.log(1000.0),
)

rpn_head_3d = StandardRPNHead3d(
    in_channels=256,
    num_anchors=anchor_generator_3d.num_cell_anchors[0],
    box_dim=6
)

rpn_matcher = Matcher(
    thresholds=[0.1, 0.3],
    labels=[0, -1, 1],
    allow_low_quality_matches=True,
)

roi_matcher = Matcher(
    thresholds=[0.5],
    labels=[0, 1],
    allow_low_quality_matches=True,
)

roi_pooler = ROIPooler3D(
    output_size=(7, 7, 7),
    canonical_level=4,
    canonical_box_size=64,
    pooler_type="ROIALign3DV2",
    scales=[1, 2, 0.5, 0.25]
)

rpn = RPN3D(
    in_features=["p2", "p3", "p4", "p5"],
    head=rpn_head_3d,
    anchor_generator=anchor_generator_3d,
    anchor_matcher=rpn_matcher,
    box3d_transform=box3d2box3d_transform,
    batch_size_per_image=300,
    positive_fraction=0.3,
    pre_nms_topk=(1000, 500),
    post_nms_topk=(600, 600),
    nms_thresh=0.1,
    min_box_size=2.0,
    box_reg_loss_type="smooth_l1",
    smooth_l1_beta=0.0,
)

box_head = FastRCNNConvFCHead3D(
    input_shape=ShapeSpec(256, 7, 7, 7),
    conv_dims=[256,256],
    fc_dims=[512]
)

output_layers = FasterRCNNOutputLayers3D(
    input_dim=512,
    num_classes=1,
    box2box_transform=box3d2box3d_transform,
    cls_agnostic_bbox_reg=False,
    test_score_thresh=0.0,
    test_nms_thresh=0.1
)

roi_head = ROIHeads3D(
    num_classes=1,
    batch_size_per_image=300,
    positive_fraction=0.3,
    proposal_matcher=roi_matcher,
    roi_pooler=roi_pooler,
    proposal_append_gt=False,
    box_head=box_head,
    box_predictor=output_layers)

loading random weights


In [7]:
model = GeneralizedRCNN3D(backbone=backbone_fpn, rpn=rpn, roi_heads=roi_head)


In [8]:
# Ensure batch is on the same device as model
device = torch.device("cuda:0")

image = batch["image"].to(device)
gt_boxes = [box.to(device) for box in batch["gt_boxes"]]
gt_classes = [cls.to(device) for cls in batch["gt_classes"]]

def centersize_to_corners_3d(boxes):
    c = boxes[:, :3]
    s = boxes[:, 3:] / 2
    return torch.cat([c - s, c + s], dim=1)

gt_boxes = [centersize_to_corners_3d(box) for box in gt_boxes]

# Rebuild batch dict
batch_on_device = {
    "image": image,
    "gt_boxes": gt_boxes,
    "gt_classes": gt_classes,
}

# Now call forward
out = model.forward(image, build_instances_3d(batch_on_device))

NotImplementedError: Module [GeneralizedRCNN3D] is missing the required "forward" function

In [75]:
out['rpn_losses']

NameError: name 'out' is not defined

In [9]:
out['roi_outputs']

{'loss_cls': tensor(0.6933, device='cuda:0', grad_fn=<MulBackward0>),
 'loss_box_reg': tensor(1.1681e-05, device='cuda:0', grad_fn=<MulBackward0>)}

In [9]:
device = torch.device("cuda:2")

targets = []
image_size = batch['image'].shape[-3:]

def centersize_to_corners_3d(boxes):
    c = boxes[:, :3]
    s = boxes[:, 3:] / 2
    return torch.cat([c - s, c + s], dim=1)

for gt_boxes, gt_classes in zip(batch['gt_boxes'], batch['gt_classes']):

    inst = Instances3D(image_size=image_size)
    gt_boxes = centersize_to_corners_3d(gt_boxes)
    inst.gt_boxes = Boxes3D(gt_boxes.to(device))
    inst.gt_classes = gt_classes.to(device).long()
    targets.append(inst)

model = model.to(device)
image = batch['image'].to(device)
targets = targets

In [None]:
from clearml import Task
from pytorch_lightning.loggers import TensorBoardLogger

task_init_kwargs = dict(
    project_name="qct_nodule_detection_ishan",
    task_name="toy_run_fasterrcnn",
    reuse_last_task_id=True,
    auto_connect_frameworks={"pytorch": False, "tensorboard": True},
)

task = Task.init(**task_init_kwargs)

tb_logger = TensorBoardLogger(
        save_dir="/home/users/ishan.tiwari/Ishan_Nodseg/logs",
        name="toy_run_fasterrcnn",
        version="default",
    )

lit_model = FasterRCNN3DLightning(model=model, learning_rate=0.001)

trainer = pl.Trainer(
    max_epochs=5,
    accelerator="gpu",
    devices=[0],                    # GPU 3 or [4] for GPU 4
    precision="32",                 # Changed from int 32 to string "32"
    logger=tb_logger,
    log_every_n_steps=5,
    enable_checkpointing=False,
    enable_model_summary=True,
    enable_progress_bar=True,       # Explicitly enable progress bar
)

trainer.fit(
    lit_model,
    datamodule=datamodule
)


ClearML Task: overwriting (reusing) task id=465d0d4ecb314b849ceaaf386399061b
2026-01-23 14:33:15,522 - clearml.Task - INFO - No repository found, storing script code instead
ClearML results page: https://experiments.qure.ai/projects/9a57571fc53142b99d361bc2f603bf79/experiments/465d0d4ecb314b849ceaaf386399061b/output/log
CLEARML-SERVER new package available: UPGRADE to v2.3.0 is recommended!
Release Notes:
### New Features and Improvements

- New Project Workloads dashboard: View project resource utilization by resource, user, and subproject
- New Global Search advanced mode: Direct API filter specification for more specific queries
- Improve single-value scalar Compare view: Separate plot per variant when grouping by metric
- Enable customizing the default name format of cloned tasks 

### Bug Fixes

- Fix UI GCS credentials popup continues reappearing after broken upload #296
- Fix UI sometimes fails to load large task console logs #295
- Fix deleting parameter in UI task configuratio


Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name  | Type              | Params | Mode 
----------------------------------------------------
0 | model | GeneralizedRCNN3D | 85.5 M | train
----------------------------------------------------
85.3 M    Trainable params
196 K     Non-trainable params
85.5 M    Total params
342.179   Tot

Sanity Checking: |          | 0/? [00:00<?, ?it/s]


Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 10. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.



Training: |          | 0/? [00:00<?, ?it/s]

Labels after matching -  (tensor([0], device='cuda:0', dtype=torch.int8), tensor([4680], device='cuda:0'))
Labels after matching -  (tensor([0], device='cuda:0', dtype=torch.int8), tensor([4680], device='cuda:0'))
Labels after matching -  (tensor([0], device='cuda:0', dtype=torch.int8), tensor([4680], device='cuda:0'))
Labels after matching -  (tensor([-1,  0,  1], device='cuda:0', dtype=torch.int8), tensor([  13, 4663,    4], device='cuda:0'))
Labels after matching -  (tensor([-1,  0,  1], device='cuda:0', dtype=torch.int8), tensor([  34, 4645,    1], device='cuda:0'))
Labels after matching -  (tensor([0], device='cuda:0', dtype=torch.int8), tensor([4680], device='cuda:0'))
Labels after matching -  (tensor([0], device='cuda:0', dtype=torch.int8), tensor([4680], device='cuda:0'))
Labels after matching -  (tensor([0], device='cuda:0', dtype=torch.int8), tensor([4680], device='cuda:0'))
Labels after matching -  (tensor([-1,  0], device='cuda:0', dtype=torch.int8), tensor([   3, 4677], de


Detected KeyboardInterrupt, attempting graceful shutdown ...


SystemExit: 1


To exit: use 'exit', 'quit', or Ctrl-D.





: 

In [None]:
import gc

def clear_cache():
    """Clear GPU and Python memory cache without restarting notebook"""
    torch.cuda.empty_cache()
    gc.collect()
    print("✓ GPU cache cleared")
    print("✓ Python garbage collected")

# Example usage:
clear_cache()

## Inference sample

In [17]:
checkpoint = torch.load("/home/users/ishan.tiwari/Ishan_Nodseg/checkpoints/toy_fasterrcnn/last.ckpt", map_location="cpu", weights_only=False)

In [18]:
state_dict = checkpoint['state_dict']
device = torch.device("cuda:0")

In [53]:
cleaned_state_dict = {}

for k, v in state_dict.items():
    if k.startswith('model.'):
        new_k = k[len('model.'):]
        cleaned_state_dict[new_k] = v
    else:
        cleaned_state_dict[k] = v

In [54]:
model.load_state_dict(cleaned_state_dict)
model = model.to(device)

In [55]:
train_batch = next(iter(train_dataloader))
val_batch = next(iter(val_dataloader))

In [56]:
detections = model.forward_inference(val_batch["image"].to(device))

Image 0: #RPN proposals = 98
Image 1: #RPN proposals = 125


In [9]:
import glob

glob.glob("/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/*.ipynb")

['/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/17_baselightning.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/05_box_regression.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/09_memory.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/06_rpn.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/03_layers.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/08_sampling.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/index.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/dataset.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/sanity_check.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/15_roi_heads.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/07_matcher.ipynb',
 '/home/users/ishan.tiwari/Ishan_Nodseg/qct_3d_nod_detect/nbs/10_proposal_utis.ipynb',
 '/home/users/isha

In [58]:
detections[0][0]

{'pred_boxes': tensor([[  2.3879,   0.0000, 117.0062,   7.8417,   0.9843, 128.0000],
         [ 11.4391,   0.0000, 121.2354,  23.2090,   0.6746, 128.0000],
         [  8.2935,   0.0000, 121.9804,  13.2481,   0.0000, 128.0000],
         [ 21.1803,   0.0000, 122.1294,  32.8205,   0.6274, 128.0000],
         [ 12.2192,   0.0000, 121.8960,  17.1306,   0.0000, 128.0000],
         [ 16.1930,   0.0000, 122.0025,  21.1102,   0.0000, 128.0000],
         [  4.2159,   0.0000, 121.9089,   9.1072,   0.0000, 128.0000],
         [ 20.0368,   0.0000, 121.9747,  24.8671,   0.0000, 128.0000],
         [ 23.9289,   0.0000, 122.1089,  28.7197,   0.0000, 128.0000],
         [ 36.8980,   0.0000, 122.9260,  47.9540,   0.5861, 128.0000],
         [ 27.7259,   0.0000, 122.1156,  32.4054,   0.0000, 128.0000],
         [ 31.5801,   0.0000, 122.2620,  36.2014,   0.0000, 128.0000],
         [ 35.3781,   0.0000, 122.2702,  39.8871,   0.0000, 128.0000],
         [116.2006,   0.0000, 121.9775, 120.8130,   0.0000, 128

In [6]:
boxes = [
    # --- Base reference box ---
    [0.0, 0.0, 0.0, 10.0, 10.0, 10.0],      # A: base cube

    # --- High-overlap boxes ---
    [1.0, 1.0, 1.0, 9.0, 9.0, 9.0],         # B: fully inside A (IoU high)
    [2.0, 2.0, 2.0, 12.0, 12.0, 12.0],      # C: partial overlap with A
    [-2.0, -2.0, -2.0, 8.0, 8.0, 8.0],      # D: partial overlap from negative side

    # --- Identical box ---
    [0.0, 0.0, 0.0, 10.0, 10.0, 10.0],      # E: identical to A

    # --- Touching but zero IoU ---
    [10.0, 0.0, 0.0, 20.0, 10.0, 10.0],     # F: touches A on +X face
    [0.0, 10.0, 0.0, 10.0, 20.0, 10.0],     # G: touches A on +Y face
    [0.0, 0.0, 10.0, 10.0, 10.0, 20.0],     # H: touches A on +Z face
    [10.0, 10.0, 10.0, 20.0, 20.0, 20.0],   # I: touches A at one corner

    # --- Thin / near-degenerate boxes ---
    [5.0, 5.0, 5.0, 5.1, 9.0, 9.0],         # J: very thin in X
    [6.0, 6.0, 6.0, 9.0, 6.05, 9.0],        # K: very thin in Y
    [7.0, 7.0, 7.0, 9.0, 9.0, 7.02],        # L: very thin in Z

    # --- Large box swallowing others ---
    [-5.0, -5.0, -5.0, 25.0, 25.0, 25.0],  # M: contains almost everything

    # --- Partial overlap only in 2 axes ---
    [3.0, 3.0, 10.0, 8.0, 8.0, 15.0],       # N: overlaps A in X,Y but not Z
    [10.0, 3.0, 3.0, 15.0, 8.0, 8.0],       # O: overlaps A in Y,Z but not X
    [3.0, 10.0, 3.0, 8.0, 15.0, 8.0],       # P: overlaps A in X,Z but not Y

    # --- Small boxes inside B ---
    [2.5, 2.5, 2.5, 3.5, 3.5, 3.5],         # Q: tiny inside B
    [4.0, 4.0, 4.0, 6.0, 6.0, 6.0],         # R: medium inside B

    # --- Distant boxes (should never suppress) ---
    [100.0, 100.0, 100.0, 110.0, 110.0, 110.0], # S: far away
    [-100.0, -100.0, -100.0, -90.0, -90.0, -90.0], # T: far away negative

    # --- Edge-case numeric precision ---
    [0.0, 0.0, 0.0, 10.0000001, 10.0, 10.0],    # U: almost identical to A
    [9.9999999, 0.0, 0.0, 20.0, 10.0, 10.0],    # V: epsilon overlap in X
]


scores = [
    0.95,  # A: base cube (strong)
    0.90,  # B: fully inside A
    0.88,  # C: partial overlap
    0.87,  # D: partial overlap (negative side)

    0.93,  # E: identical to A (lower than A but still high)

    0.60,  # F: touching face (X)
    0.62,  # G: touching face (Y)
    0.61,  # H: touching face (Z)
    0.58,  # I: touching corner only

    0.92,  # J: thin box (high confidence)
    0.91,  # K: thin box
    0.89,  # L: thin box

    0.40,  # M: huge box swallowing others (low confidence)

    0.55,  # N: overlaps only in X,Y
    0.54,  # O: overlaps only in Y,Z
    0.53,  # P: overlaps only in X,Z

    0.96,  # Q: tiny box inside B (very confident)
    0.94,  # R: medium box inside B

    0.70,  # S: far away
    0.65,  # T: far away negative

    0.949, # U: almost identical to A (float precision edge)
    0.59,  # V: epsilon overlap in X
]


In [7]:
import torch
import torch.nn as nn
import math
import numpy as np
from typing import List

In [8]:
def iou_3d(box1, box2):

    """
    Args
        box1: Tensor[6] in corner corner format
        box2: Tensor[6] in corner corner format
    """

    x1 = max(box1[0], box2[0])
    y1 = min(box1[1], box2[1])
    z1 = max(box1[2], box2[2])

    x2 = min(box1[3], box2[3])
    y2 = max(box1[4], box2[4])
    z2 = min(box1[5], box2[5])

    def get_area_rectangle(box1, box2):

        """
        Args
            returns the area of the given boxes (Tensor[6] in corner corner format)
        """

        area1 = max(0, (box1[3]-box1[0])*(box1[4] - box1[1])*(box1[5] - box1[2]))
        area2 = max(0, (box2[3]-box2[0])*(box2[4] - box2[1])*(box2[5] - box2[2]))

        return area1, area2
    
    intersection = max(0, (x2-x1)*(y2-y1)*(z2-z1))
    area_box1, area_box2 = get_area_rectangle(box1, box2)
    union = area_box1 + area_box2 - intersection
    
    return intersection / (union + 1e-8)

In [30]:
def nms(predictions: np.array,
        scores: np.array,
        nms_thresh: float):

    """
    Args:

        predictions: List/array of length N containing bboxes of shape (,6) in corner corner format
        scores: Contains List of scores corresponding to each box
        nms_thresh: Threshold to perform nms on
    """

    if not isinstance(predictions, np.ndarray):
        predictions = np.array(predictions)

    if not isinstance(scores, np.ndarray):
        scores = np.array(scores)

    detections = []
    ind = np.argsort(scores, axis=0)[::-1]
    predictions_sorted = list(predictions[ind])

    while list(predictions_sorted):
        max_instance = predictions_sorted[0]

        for i, box in enumerate(predictions_sorted):
            if iou_3d(max_instance, box) >= nms_thresh:
                predictions_sorted.pop(i)

        detections.append(max_instance)

    return detections

In [31]:
detections = nms(predictions=boxes, scores=scores, nms_thresh=0.1)

In [32]:
detections

[array([2.5, 2.5, 2.5, 3.5, 3.5, 3.5]),
 array([ 0.       ,  0.       ,  0.       , 10.0000001, 10.       ,
        10.       ]),
 array([ 0.,  0.,  0., 10., 10., 10.]),
 array([5. , 5. , 5. , 5.1, 9. , 9. ]),
 array([6.  , 6.  , 6.  , 9.  , 6.05, 9.  ]),
 array([7.  , 7.  , 7.  , 9.  , 9.  , 7.02]),
 array([100., 100., 100., 110., 110., 110.]),
 array([-100., -100., -100.,  -90.,  -90.,  -90.]),
 array([ 0.,  0., 10., 10., 10., 20.]),
 array([10.,  0.,  0., 20., 10., 10.]),
 array([ 9.9999999,  0.       ,  0.       , 20.       , 10.       ,
        10.       ])]

In [21]:
iou_3d([1., 1., 1., 9., 9., 9.], [0, 0, 0, 10, 10, 10])

0.7339449541200236

[0.9,
 0.88,
 0.87,
 0.93,
 0.6,
 0.62,
 0.61,
 0.58,
 0.92,
 0.91,
 0.89,
 0.4,
 0.55,
 0.54,
 0.53,
 0.96,
 0.94,
 0.7,
 0.65,
 0.949,
 0.59]

In [22]:
boxes

[[1.0, 1.0, 1.0, 9.0, 9.0, 9.0],
 [2.0, 2.0, 2.0, 12.0, 12.0, 12.0],
 [-2.0, -2.0, -2.0, 8.0, 8.0, 8.0],
 [0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
 [10.0, 0.0, 0.0, 20.0, 10.0, 10.0],
 [0.0, 10.0, 0.0, 10.0, 20.0, 10.0],
 [0.0, 0.0, 10.0, 10.0, 10.0, 20.0],
 [10.0, 10.0, 10.0, 20.0, 20.0, 20.0],
 [5.0, 5.0, 5.0, 5.1, 9.0, 9.0],
 [6.0, 6.0, 6.0, 9.0, 6.05, 9.0],
 [7.0, 7.0, 7.0, 9.0, 9.0, 7.02],
 [-5.0, -5.0, -5.0, 25.0, 25.0, 25.0],
 [3.0, 3.0, 10.0, 8.0, 8.0, 15.0],
 [10.0, 3.0, 3.0, 15.0, 8.0, 8.0],
 [3.0, 10.0, 3.0, 8.0, 15.0, 8.0],
 [2.5, 2.5, 2.5, 3.5, 3.5, 3.5],
 [4.0, 4.0, 4.0, 6.0, 6.0, 6.0],
 [100.0, 100.0, 100.0, 110.0, 110.0, 110.0],
 [-100.0, -100.0, -100.0, -90.0, -90.0, -90.0],
 [0.0, 0.0, 0.0, 10.0000001, 10.0, 10.0],
 [9.9999999, 0.0, 0.0, 20.0, 10.0, 10.0]]

In [14]:
np.take_along_axis(np.array(boxes), boxes_score_sorted, axis=0)

array(['5', '1', '3', '2', '4'], dtype='<U1')

TypeError: '>' not supported between instances of 'list' and 'float'