In [76]:
import torch
import torchvision
import cv2
import numpy as np
import albumentations as A
from skimage import io
import pandas as pd

print("Torch:", torch.__version__)
print("Torchvision:", torchvision.__version__)
print("OpenCV:", cv2.__version__)
print("Albumentations:", A.__version__)

Torch: 2.6.0+cu124
Torchvision: 0.21.0+cu124
OpenCV: 4.12.0
Albumentations: 2.0.8


In [77]:
IMG_SIZE = 256        
NUM_LANDMARKS = 4     
HEATMAP_SIZE = 256    
GAUSSIAN_RADIUS = 4

In [78]:
import numpy as np
import cv2

def generate_heatmap(h, w, x, y, radius=4):
    heatmap = np.zeros((h, w), dtype=np.float32)
    
    x = int(x)
    y = int(y)

    if x < 0 or y < 0 or x >= w or y >= h:
        return heatmap  
    
    cv2.circle(heatmap, (x, y), radius, 1.0, -1)
    heatmap = cv2.GaussianBlur(heatmap, (0,0), sigmaX=radius, sigmaY=radius)
    heatmap = heatmap / heatmap.max() if heatmap.max() > 0 else heatmap
    return heatmap

In [79]:
import pandas as pd

val_df = pd.read_csv("/kaggle/input/trainertaska-1/validation_split.csv")
VAL_IMG_DIR = "/kaggle/input/trainertaska-1/val_images_zip"

len(val_df), VAL_IMG_DIR 

(94, '/kaggle/input/trainertaska-1/val_images_zip')

In [80]:
def resize_with_landmarks(img, landmarks):
    h, w = img.shape[:2]

    img_resized = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

    scale_x = IMG_SIZE / w
    scale_y = IMG_SIZE / h

    scaled = []
    for x, y in landmarks:
        scaled.append((x * scale_x, y * scale_y))

    return img_resized, scaled

In [81]:
import torch
from torch.utils.data import Dataset

class FetalLandmarkDataset(Dataset):
    def __init__(self, df, img_dir, augment=False):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.augment = augment

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        try:
            row = self.df.iloc[idx]
            img_path = os.path.join(self.img_dir, row['image_name'])

            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                raise ValueError("Image failed to load")

            landmarks = [
                (row['ofd_1_x'], row['ofd_1_y']),
                (row['ofd_2_x'], row['ofd_2_y']),
                (row['bpd_1_x'], row['bpd_1_y']),
                (row['bpd_2_x'], row['bpd_2_y'])
            ]

            if self.augment:
                augmented = train_augs(image=img, keypoints=landmarks)
                img = augmented['image']
                landmarks = augmented['keypoints']

            img, landmarks = resize_with_landmarks(img, landmarks)
            img = np.ascontiguousarray(img).astype(np.float32) / 255.0

            img = np.stack([img, img, img], axis=0)

     
            landmarks = list(landmarks)

            if len(landmarks) > 4:
                landmarks = landmarks[:4]

            while len(landmarks) < 4:
                landmarks.append((None, None))

            heatmaps = []
            for (x, y) in landmarks:
                if (
                    x is None or y is None or
                    np.isnan(x) or np.isnan(y)
                ):
                    hm = np.zeros((HEATMAP_SIZE, HEATMAP_SIZE), dtype=np.float32)
                else:
                    hm = generate_heatmap(
                        HEATMAP_SIZE,
                        HEATMAP_SIZE,
                        int(x),
                        int(y),
                        GAUSSIAN_RADIUS
                    )
                heatmaps.append(hm)

            heatmaps = np.stack(heatmaps, axis=0).astype(np.float32)
            heatmaps = np.ascontiguousarray(heatmaps)

            return (
                torch.from_numpy(img.copy()).float(),
                torch.from_numpy(heatmaps.copy()).float()
            )

        except Exception as e:
            print("BAD SAMPLE:", idx, self.df.iloc[idx]['image_name'], e)

            dummy_img = np.zeros((3, 256, 256), dtype=np.float32)
            dummy_hm  = np.zeros((4, 256, 256), dtype=np.float32)

            return (
                torch.from_numpy(dummy_img).float(),
                torch.from_numpy(dummy_hm).float()
            )


In [82]:
from torch.utils.data import DataLoader
val_dataset = FetalLandmarkDataset(val_df, VAL_IMG_DIR, augment=False)
val_loader  = DataLoader(val_dataset, batch_size=8, shuffle=False)

In [83]:
import torch
import torch.nn as nn
class Hourglass(nn.Module):
    def __init__(self, depth, channels):
        super().__init__()
        self.depth = depth
        self.channels = channels

        self.res = ConvBlock(channels, channels)

        if depth > 1:
            self.down = DownSample(channels)
            self.inner = Hourglass(depth-1, channels)
            self.up = UpSample(channels)
        else:
            self.inner_res = ConvBlock(channels, channels)

    def forward(self, x):
        up1 = self.res(x)

        if self.depth > 1:
            low = self.down(x)
            low = self.inner(low)
            low = self.up(low)
        else:
            low = self.inner_res(x)

        return up1 + low


In [84]:
import torch
import torch.nn as nn

class ConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch)
        )
        self.skip = nn.Conv2d(in_ch, out_ch, 1, bias=False)

    def forward(self, x):
        return torch.relu(self.conv(x) + self.skip(x))


In [85]:
class DownSample(nn.Module):
    def __init__(self, ch):
        super().__init__()
        self.pool = nn.MaxPool2d(2)
        self.conv = ConvBlock(ch, ch)

    def forward(self, x):
        return self.conv(self.pool(x))


class UpSample(nn.Module):
    def __init__(self, ch):
        super().__init__()
        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv = ConvBlock(ch, ch)

    def forward(self, x):
        return self.conv(self.up(x))


In [86]:
class HeatmapHead(nn.Module):
    def __init__(self, ch, num_landmarks):
        super().__init__()
        self.head = nn.Sequential(
            nn.Conv2d(ch, ch, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(ch, num_landmarks, 1)
        )

    def forward(self, x):
        return self.head(x)


In [87]:
class HourglassNet(nn.Module):
    def __init__(self, num_landmarks=4, channels=64, depth=4):
        super().__init__()

        self.pre = nn.Sequential(
            ConvBlock(3, channels),
            ConvBlock(channels, channels)
        )

        self.hourglass = Hourglass(depth, channels)
        self.head = HeatmapHead(channels, num_landmarks)

    def forward(self, x):
        x = self.pre(x)
        x = self.hourglass(x)
        x = self.head(x)
        return x 

In [88]:
model = HourglassNet(num_landmarks=4).cuda()
model.load_state_dict(torch.load("/kaggle/input/trainertaska-1/best_hourglass.pth"))
model.eval() 

HourglassNet(
  (pre): Sequential(
    (0): ConvBlock(
      (conv): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (skip): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    )
    (1): ConvBlock(
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runni

In [89]:
import torch
import numpy as np

def decode_heatmaps(heatmaps):
    """
    heatmaps: (B, 4, H, W)
    returns: (B, 4, 2)  ->  (x,y) per landmark
    """
    B, C, H, W = heatmaps.shape
    coords = []

    for b in range(B):
        sample_pts = []
        for c in range(C):
            hm = heatmaps[b, c]
            y, x = torch.nonzero(hm == hm.max(), as_tuple=True)
            x = x[0].item()
            y = y[0].item()
            sample_pts.append([x, y])
        coords.append(sample_pts)

    return torch.tensor(coords, dtype=torch.float32)


In [90]:
import numpy as np
import torch

@torch.no_grad()
def evaluate_on_validation(model, loader):
    all_errors = []

    model.eval()

    for imgs, gt_hm in loader:
        imgs = imgs.cuda()

        pred_hm = model(imgs)
        pred_xy = decode_heatmaps(pred_hm).cpu().numpy()
        gt_xy   = decode_heatmaps(gt_hm).numpy()

        for p, g in zip(pred_xy, gt_xy):
            d = np.linalg.norm(p - g, axis=1)  # shape (4,)
            all_errors.extend(d)

    print("Validation Mean Pixel Error :", np.mean(all_errors))
    print("Validation Median Pixel Error :", np.median(all_errors))
    return np.array(all_errors)

val_errors = evaluate_on_validation(model, val_loader)

Validation Mean Pixel Error : 24.68644
Validation Median Pixel Error : 4.736068
