# Retinanet with Resnet50 Backbone

In [1]:
!pip install SimpleITK pandas numpy torch torchvision



In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [3]:
import sys
sys.path.append('/content')

import SimpleITK as sitk
import numpy as np
import pandas as pd
import os
from glob import glob
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
from torch.optim import SGD
from torchvision.models.detection import RetinaNet, retinanet_resnet50_fpn
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone, _resnet_fpn_extractor
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T
from sklearn.metrics import average_precision_score

In [None]:
class NoduleDataset(Dataset):
    def __init__(self, file_path, annotations_sub0_path, transform=None):
        self.file_list = glob(os.path.join(file_path, "*.mhd"))
        self.annotations = pd.read_csv(annotations_sub0_path)
        self.transform = transform
        self.slices = []
        self.slice_annotations = []

        # Load and process the images
        for img_file in self.file_list:
            print(f"Processing file: {img_file}")
            itk_img = sitk.ReadImage(img_file)
            img_array = sitk.GetArrayFromImage(itk_img)

            # Clip values lower than -1024
            img_array = np.clip(img_array, -1024, None)
            # Normalize to 0-1 range
            img_array = (img_array - np.min(img_array)) / (np.max(img_array) - np.min(img_array))

            series_uid = os.path.basename(img_file).split('.mhd')[0]
            # print(f"Extracted series UID: {series_uid}")
            series_annotations = self.annotations[self.annotations['seriesuid'] == series_uid]
            # print(f"Annotations found: {len(series_annotations)} for series UID: {series_uid}")

            for i in range(img_array.shape[0]):
                slice_annots = self.get_slice_annotations(series_annotations, i)
                if slice_annots['boxes'].size(0) > 0:  # Only add slices with annotations
                    self.slices.append(img_array[i])
                    self.slice_annotations.append(slice_annots)

    def __len__(self):
        return len(self.slices)

    def __getitem__(self, idx):
        slice = self.slices[idx]
        slice = torch.tensor(slice, dtype=torch.float32).unsqueeze(0)  # Add channel dimension (1, H, W)

        if self.transform:
            slice = self.transform(slice)

        target = self.slice_annotations[idx]
        return slice, target

    def get_slice_annotations(self, series_annotations, slice_idx):
        bboxes = []
        labels = []
        for _, row in series_annotations.iterrows():
            z, y, x = map(float, row['cartesian_coords(zyx)'].strip('()').split(', '))
            diameter = float(row['diameter_mm'])
            if int(z) == slice_idx:
                bbox = self.get_bbox_from_cartesian(x, y, diameter)
                bboxes.append(bbox)
                labels.append(1)  # Assuming label 1 for nodules

        if bboxes:
            bboxes = torch.tensor(bboxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)
            # print(f"Annotations found for Slice IDX: {slice_idx}: {len(bboxes)}")
        else:
            bboxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)

        return {'boxes': bboxes, 'labels': labels}

    def get_bbox_from_cartesian(self, x, y, diameter):
        # Convert Cartesian coordinates to bounding box
        bbox = [x - diameter / 2, y - diameter / 2, x + diameter / 2, y + diameter / 2]  # xmin, ymin, xmax, ymax
        return bbox

def collate_fn(batch):
    slices = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    slices = torch.stack(slices, dim=0)

    return slices, targets

# Example usage
file_path = '/content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0'
annotations_sub0_path = '/content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subset0_annotations_expanded.csv'

# Define transformation (if needed)
transform = None

dataset = NoduleDataset(file_path, annotations_sub0_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=collate_fn)
'''
# Testing the DataLoader with debug information
for batch_idx, (data, targets) in enumerate(dataloader):
    non_empty_annotations = [i for i in range(len(data)) if targets[i]['boxes'].size(0) > 0]
    if non_empty_annotations:
        print(f"Batch {batch_idx + 1}")
        print(f"Data: {data.shape}")
        print(f"Targets: {targets}")
        for idx in non_empty_annotations:
            print(f"Data sample {idx + 1}:")
            print(f"Boxes: {targets[idx]['boxes']}")
            print(f"Labels: {targets[idx]['labels']}")
        break  # Print only the first batch with annotations for brevity
'''

Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260.mhd
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492.mhd
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059.mhd
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.111172165674661221381920536987.mhd
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.122763913896761494371822656720.mhd
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.124154461048929153767743874565.mhd
Processing file: /content/drive/Shareddrives/IA DL_project

## Define the Model (ResNet50 Backbone Retinanet)



In [None]:
class CustomRetinaNet(nn.Module):
    def __init__(self, num_classes):
        super(CustomRetinaNet, self).__init__()
        # Load a pretrained ResNet-50 model with the updated weights parameter
        backbone = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.backbone = _resnet_fpn_extractor(backbone, trainable_layers=5)

        # Create the RetinaNet model with the custom backbone
        self.model = RetinaNet(self.backbone, num_classes=num_classes)

    def forward(self, images, targets=None):
        return self.model(images, targets)

def create_model(num_classes):
    model = CustomRetinaNet(num_classes)
    return model

# Global parameters
NUM_CLASSES = 2  # 1 class (nodule) + background
BATCH_SIZE = 4
LEARNING_RATE = 0.001  # Initial learning rate for SGD
MOMENTUM = 0.9  # Momentum for SGD
WEIGHT_DECAY = 0.0001  # Weight decay for regularization
NUM_EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create the model
model = create_model(NUM_CLASSES).to(DEVICE)

### Define the map metric

In [None]:
# Evaluate predictions
def evaluate_predictions(pred_boxes, pred_scores, true_boxes, true_labels, iou_threshold=0.5):
    all_ap = []
    for i in range(len(true_boxes)):
        pred_box = pred_boxes[i]
        pred_score = pred_scores[i]
        true_box = true_boxes[i]
        true_label = true_labels[i]

        if len(pred_box) == 0:
            continue

        if len(true_box) == 0:
            continue

        iou = calculate_iou(pred_box, true_box)

        matches = iou > iou_threshold
        tp = np.sum(matches)
        fp = len(pred_box) - tp
        fn = len(true_box) - tp

        precision = tp / (tp + fp)
        recall = tp / (tp + fn)

        ap = average_precision_score(matches, pred_score[:len(matches)])
        all_ap.append(ap)

    mean_ap = np.mean(all_ap) if all_ap else 0.0
    return mean_ap, all_ap

def calculate_iou(boxes1, boxes2):
    # Calculate intersection over union
    x1 = np.maximum(boxes1[:, 0], boxes2[:, 0])
    y1 = np.maximum(boxes1[:, 1], boxes2[:, 1])
    x2 = np.minimum(boxes1[:, 2], boxes2[:, 2])
    y2 = np.minimum(boxes1[:, 3], boxes2[:, 3])

    intersection = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
    union = area1 + area2 - intersection

    iou = intersection / union
    return iou

### Define Training loop

In [None]:
# Optimizer and learning rate scheduler
optimizer = SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Function to train the model
def train_model(model, train_loader, val_loader, optimizer, lr_scheduler, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch_idx, (images, targets) in enumerate(train_loader):
            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimizer.step()

            epoch_loss += losses.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{batch_idx}/{len(train_loader)}], Loss: {losses.item():.4f}")

        lr_scheduler.step()
        print(f"Epoch [{epoch + 1}/{num_epochs}] completed with average loss: {epoch_loss / len(train_loader):.4f}")

        # Validate the model
        model.eval()
        with torch.no_grad():
            val_loss = 0
            all_pred_boxes = []
            all_pred_scores = []
            all_true_boxes = []
            all_true_labels = []

            for images, targets in val_loader:
                images = list(image.to(DEVICE) for image in images)
                targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

                outputs = model(images)

                for i in range(len(outputs)):
                    all_pred_boxes.append(outputs[i]['boxes'].cpu().numpy())
                    all_pred_scores.append(outputs[i]['scores'].cpu().numpy())
                    all_true_boxes.append(targets[i]['boxes'].cpu().numpy())
                    all_true_labels.append(targets[i]['labels'].cpu().numpy())

            mean_ap, aps = evaluate_predictions(all_pred_boxes, all_pred_scores, all_true_boxes, all_true_labels)
            print(f"Validation mAP: {mean_ap}, APs: {aps}")

        model.train()  # Switch back to training mode

        # Save checkpoint
        checkpoint_path = f"retinanet_epoch_{epoch + 1}.pth"
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Model checkpoint saved at {checkpoint_path}")

# Create dataset and split into training and validation sets
dataset = NoduleDataset(file_path, annotations_sub0_path, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, collate_fn=collate_fn)

# Start training
train_model(model, train_loader, val_loader, optimizer, lr_scheduler, NUM_EPOCHS)

Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0_test/1.3.6.1.4.1.14519.5.2.1.6279.6001.979083010707182900091062408058.mhd
Extracted series UID: 1.3.6.1.4.1.14519.5.2.1.6279.6001.979083010707182900091062408058
Annotations found: 1 for series UID: 1.3.6.1.4.1.14519.5.2.1.6279.6001.979083010707182900091062408058
Annotations found for Slice IDX: 56: 1
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0_test/1.3.6.1.4.1.14519.5.2.1.6279.6001.898642529028521482602829374444.mhd
Extracted series UID: 1.3.6.1.4.1.14519.5.2.1.6279.6001.898642529028521482602829374444
Annotations found: 2 for series UID: 1.3.6.1.4.1.14519.5.2.1.6279.6001.898642529028521482602829374444
Annotations found for Slice IDX: 66: 1
Annotations found for Slice IDX: 97: 1
Processing file: /content/drive/Shareddrives/IA DL_project/ML IA/LUNA16/subsets/subset0_test/1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371.mhd
Extracted series UI