# Import Required Libraries & Setup Device

In [1]:
import os
import random
import math
import numpy as np
import pandas as pd
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
from torchvision.transforms import ColorJitter, RandomAffine, RandomPerspective
from torchvision import models

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Error Handling
try:
    from torch.cuda.amp import autocast, GradScaler
except ImportError:
    from contextlib import contextmanager
    @contextmanager
    def autocast(enabled=True):
        yield
    class GradScaler:
        def scale(self, loss):
            return loss
        def unscale_(self, optimizer):
            pass
        def step(self, optimizer):
            optimizer.step()
        def update(self):
            pass

# Set Current Working Directory
from pathlib import Path
project_root = Path.cwd().parent



# Define Data Augmentations and Helper Functions
Implementation of the random_horizontal_flip and random_rotation functions to apply data augmentation on images and landmarks.

In [2]:
def random_horizontal_flip(image, landmarks, p=0.5):
    """Flip the image and landmarks horizontally with probability p."""
    if random.random() < p:
        image = image.transpose(method=Image.FLIP_LEFT_RIGHT)
        w = image.width
        landmarks[:, 0] = w - landmarks[:, 0]
    return image, landmarks

def random_rotation(image, landmarks, max_angle=15):
    """Rotate the image and landmarks by a random angle in [-max_angle, max_angle]."""
    angle = random.uniform(-max_angle, max_angle)
    image = image.rotate(angle, resample=Image.BILINEAR)
    w, h = image.width, image.height
    cx, cy = w / 2, h / 2
    angle_rad = -math.radians(angle)
    rot_matrix = np.array([
        [math.cos(angle_rad), -math.sin(angle_rad)],
        [math.sin(angle_rad), math.cos(angle_rad)]
    ])
    landmarks = landmarks - np.array([cx, cy], dtype=np.float32)
    landmarks = np.dot(landmarks, rot_matrix.T)
    landmarks = landmarks + np.array([cx, cy], dtype=np.float32)
    return image, landmarks

# Implementing FetalLandmarkDataset and Data Loading
Create the FetalLandmarkDataset class to read image paths and landmarks from CSV, apply resizing, augmentations, and use DataLoader for training and validation splits.

In [3]:
class FetalLandmarkDataset(Dataset):
    """
    Reads ultrasound images and landmark ground-truth from a CSV.
    The CSV must have:
      image_name, ofd_1_x, ofd_1_y, ofd_2_x, ofd_2_y,
                  bpd_1_x, bpd_1_y, bpd_2_x, bpd_2_y
    Applies resizing, advanced data augmentation, and returns:
      - image: Tensor of shape [3, H, W]
      - landmarks: Tensor of shape [4, 2]
    """
    def __init__(self, images_dir, csv_file, image_size=(256, 256), augment=True):
        self.images_dir = images_dir
        self.df = pd.read_csv(csv_file)
        self.image_size = image_size
        self.augment = augment

        self.extra_transforms = T.Compose([
            ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
            RandomPerspective(distortion_scale=0.4, p=0.5),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row['image_name']
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        landmarks = np.array([
            [row['ofd_1_x'], row['ofd_1_y']],
            [row['ofd_2_x'], row['ofd_2_y']],
            [row['bpd_1_x'], row['bpd_1_y']],
            [row['bpd_2_x'], row['bpd_2_y']]
        ], dtype=np.float32)
        orig_w, orig_h = image.size
        target_w, target_h = self.image_size
        scale_x, scale_y = target_w / orig_w, target_h / orig_h
        landmarks[:, 0] *= scale_x
        landmarks[:, 1] *= scale_y
        image = image.resize(self.image_size, resample=Image.BILINEAR)

        if self.augment:
            image, landmarks = random_horizontal_flip(image, landmarks, p=0.5)
            image, landmarks = random_rotation(image, landmarks, max_angle=15)
            image = self.extra_transforms(image)

        image = T.ToTensor()(image)
        return image, torch.tensor(landmarks, dtype=torch.float)

# Paths and dataset split
images_dir = project_root / "Dataset" / "Images"
csv_file = project_root / "Dataset" / "role_challenge_dataset_ground_truth.csv"
full_dataset = FetalLandmarkDataset(images_dir, csv_file, image_size=(256, 256), augment=True)
total_samples = len(full_dataset)
train_size = int(0.8 * total_samples)
val_size = int(0.1 * total_samples)
test_size = total_samples - train_size - val_size
train_dataset, val_dataset, _ = random_split(full_dataset, [train_size, val_size, test_size])
# Disable augmentation on validation
val_dataset.dataset.augment = False
print(f"Total samples: {total_samples}")
print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}")

# DataLoader
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

Total samples: 622
Train: 497, Val: 62


# Model Definition: ResNet34 Landmark Model
Define the ResNet34LandmarkModel by modifying the last layer of a pretrained ResNet34 to output 8 coordinates (4 (x,y) pairs).

In [4]:
class ResNet34LandmarkModel(nn.Module):
    def __init__(self, pretrained=True):
        super(ResNet34LandmarkModel, self).__init__()
        self.backbone = models.resnet34(pretrained=pretrained)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, 8)  # 8 coords => 4 (x,y) pairs

    def forward(self, x):
        out = self.backbone(x)
        return out.view(-1, 4, 2)

# Instantiate the model and move it to the device
model = ResNet34LandmarkModel(pretrained=True).to(device)
print(model)



ResNet34LandmarkModel(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

# Training Loop and Early Stopping Implementation
Implement the training loop that uses MSELoss, Adam optimizer, and CosineAnnealingLR scheduler, along with early stopping and model weight saving when validation loss improves.

In [None]:
# Hyperparameters
learning_rate = 3e-4
epochs = 60
patience = 10

# Training Loop with Early Stopping and Model Weight Saving
def train_landmark_model(model, train_loader, val_loader, epochs, device, patience):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    best_val_loss = float("inf")
    no_improve_count = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        for images, landmarks in train_loader:
            images = images.to(device)
            landmarks = landmarks.to(device)
            optimizer.zero_grad()
            preds = model(images)
            loss = criterion(preds, landmarks)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images_val, landmarks_val in val_loader:
                images_val = images_val.to(device)
                landmarks_val = landmarks_val.to(device)
                preds_val = model(images_val)
                loss_val = criterion(preds_val, landmarks_val)
                val_loss += loss_val.item()
        avg_val_loss = val_loss / len(val_loader)
        scheduler.step()

        print(f"Epoch {epoch+1:02d}: Train Loss {avg_train_loss:.4f}, Val Loss {avg_val_loss:.4f}")

        # Save model weights if validation loss improves
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            no_improve_count = 0
            torch.save(model.state_dict(), project_root / "Model Weights" / "hypothesis_best_model_weights.pth")
        else:
            no_improve_count += 1
            if no_improve_count >= patience:
                print("Early Stopping Triggered.")
                break

# Execute Training
Run the training phase by initializing the training loop with provided hyperparameters and printing out the training details.

In [None]:
# Start training
train_landmark_model(model, train_loader, val_loader, epochs=epochs, device=device, patience=patience)