# Import libraries

In [14]:
import torch
import os
from torch.utils.data import DataLoader
from segmentation_models_pytorch.utils.metrics import IoU
from tqdm import tqdm

from src.utils import save_to_hdf5, load_hdf5, load_model, multiclass_iou
from src.datasets import ExcavatorDataset
from src.config import IMAGE_SIZE, TRANSFORMER, DEVICE, ROOT
from models.Segmentation import DeepLabV3Model, DeepLabV3PlusModel, PyramidAttentionNetworkModel, UNetModel

# Root

In [15]:
root = ROOT
batch_size = 1

# Initialize models

**Note**: UNEt performs quite badly (only achieves `val IoU` of 0.78).

In [16]:
# DeepLabV3
dlv3 =DeepLabV3Model().model
dlv3.load_state_dict(torch.load(f'{root}/models/torch_model_files/DeepLabV3_HybridFocalDiceLoss.pt'))
dlv3.to(DEVICE)
dlv3.eval()

# DeepLabV3Plus
dlv3p = DeepLabV3PlusModel().model
dlv3p.load_state_dict(torch.load(f'{root}/models/torch_model_files/DeepLabV3Plus_HybridFocalDiceLoss.pt'))
dlv3p.to(DEVICE)
dlv3p.eval()

# UNet
unet = UNetModel().model
unet.load_state_dict(torch.load(f'{root}/models/torch_model_files/UNet_HybridFocalDiceLoss.pt'))
unet.to(DEVICE)
unet.eval()

# Pyramid Attention Network
pan = PyramidAttentionNetworkModel().model
pan.load_state_dict(torch.load(f'{root}/models/torch_model_files/PyramidAttentionNetwork_HybridFocalDiceLoss.pt'))
pan.to(DEVICE)
pan.eval()

2024-12-08 12:16:40,534 - DeepLabV3 - INFO - New <class 'segmentation_models_pytorch.decoders.deeplabv3.model.DeepLabV3'> model created with the following info:
                            - Encoder name: resnet18
                            - Activation: softmax
                            - Classes: 12
2024-12-08 12:16:40,534 - DeepLabV3 - INFO - Device used for model: cuda


  dlv3.load_state_dict(torch.load(f'{root}/models/torch_model_files/DeepLabV3_HybridFocalDiceLoss.pt'))


2024-12-08 12:16:40,823 - DeepLabV3Plus - INFO - New <class 'segmentation_models_pytorch.decoders.deeplabv3.model.DeepLabV3Plus'> model created with the following info:
                            - Encoder name: resnet18
                            - Activation: softmax
                            - Classes: 12
2024-12-08 12:16:40,823 - DeepLabV3Plus - INFO - Device used for model: cuda


  dlv3p.load_state_dict(torch.load(f'{root}/models/torch_model_files/DeepLabV3Plus_HybridFocalDiceLoss.pt'))


2024-12-08 12:16:41,055 - UNet - INFO - New <class 'segmentation_models_pytorch.decoders.unet.model.Unet'> model created with the following info:
                            - Encoder name: resnet18
                            - Activation: softmax
                            - Classes: 12
2024-12-08 12:16:41,055 - UNet - INFO - Device used for model: cuda


  unet.load_state_dict(torch.load(f'{root}/models/torch_model_files/UNet_HybridFocalDiceLoss.pt'))


2024-12-08 12:16:41,274 - PyramidAttentionNetwork - INFO - New <class 'segmentation_models_pytorch.decoders.pan.model.PAN'> model created with the following info:
                            - Encoder name: resnet18
                            - Activation: softmax
                            - Classes: 12
2024-12-08 12:16:41,274 - PyramidAttentionNetwork - INFO - Device used for model: cuda


  pan.load_state_dict(torch.load(f'{root}/models/torch_model_files/PyramidAttentionNetwork_HybridFocalDiceLoss.pt'))


PAN(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

# Load dataset

In [18]:
train_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='train', transform=TRANSFORMER,one_hot_encode_mask=True)
print("Number of training samples:", num_train_imgs:=len(train_dataset))
val_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='validation', transform=TRANSFORMER, one_hot_encode_mask=True)
print("Number of test samples:", num_val_imgs:=len(val_dataset))

Number of training samples: 1782
Number of test samples: 187


# Compute and save predicted masks

In [25]:
train_iou_dlv3 = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_iou_dlv3p = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_iou_unet = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_iou_pan = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_paths = []

val_iou_dlv3 = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_iou_dlv3p = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_iou_unet = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_iou_pan = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_paths = []

In [29]:

# Compute predicted masks for training set
for i, (imgs, masks, paths) in tqdm(enumerate(train_dataset)):
    imgs = imgs.to(DEVICE).unsqueeze(0)
    masks = masks.to(DEVICE)
    output_dlv3 = dlv3(imgs).squeeze(0)
    output_dlv3p = dlv3p(imgs).squeeze(0)
    output_unet = unet(imgs).squeeze(0)
    output_pan = pan(imgs).squeeze(0)
    train_iou_dlv3[i] = multiclass_iou(output_dlv3, masks)
    train_iou_dlv3p[i] = multiclass_iou(output_dlv3p, masks)
    train_iou_unet[i] = multiclass_iou(output_unet, masks)
    train_iou_pan[i] = multiclass_iou(output_pan, masks)
    train_paths.append(paths)

train_paths = [path.replace('|', '/') for path in train_paths]
save_to_hdf5(f'{root}/res/model_performance/train_iou_dlv3.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(train_paths, train_iou_dlv3)})
save_to_hdf5(f'{root}/res/model_performance/train_iou_dlv3p.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(train_paths, train_iou_dlv3p)})
save_to_hdf5(f'{root}/res/model_performance/train_iou_unet.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(train_paths, train_iou_unet)})
save_to_hdf5(f'{root}/res/model_performance/train_iou_pan.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(train_paths, train_iou_pan)})

# Compute predicted masks for validation set
for i, (imgs, masks, paths) in tqdm(enumerate(val_dataset)):
    imgs = imgs.to(DEVICE).unsqueeze(0)
    masks = masks.to(DEVICE)
    output_dlv3 = dlv3(imgs).squeeze(0)
    output_dlv3p = dlv3p(imgs).squeeze(0)
    output_unet = unet(imgs).squeeze(0)
    output_pan = pan(imgs).squeeze(0)
    val_iou_dlv3[i] = multiclass_iou(output_dlv3, masks)
    val_iou_dlv3p[i] = multiclass_iou(output_dlv3p, masks)
    val_iou_unet[i] = multiclass_iou(output_unet, masks)
    val_iou_pan[i] = multiclass_iou(output_pan, masks)
    val_paths.append(paths)

val_paths = [path.replace('|', '/') for path in val_paths]
save_to_hdf5(f'{root}/res/model_performance/val_iou_dlv3.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(val_paths, val_iou_dlv3)})
save_to_hdf5(f'{root}/res/model_performance/val_iou_dlv3p.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(val_paths, val_iou_dlv3p)})
save_to_hdf5(f'{root}/res/model_performance/val_iou_unet.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(val_paths, val_iou_unet)})
save_to_hdf5(f'{root}/res/model_performance/val_iou_pan.h5', {os.path.basename(pths): iou.cpu().numpy() for pths, iou in zip(val_paths, val_iou_pan)})


1782it [01:35, 18.60it/s]
187it [00:09, 18.98it/s]


# Compute pairwise IoU differences between training and validation set

In [30]:
import h5py
import numpy as np

def load_hdf5(file_path: str) -> dict[str, np.ndarray]:
    """
    Load data from an HDF5 file.

    :param file_path: Path to the HDF5 fileuse

    :return: Dictionary containing data from the HDF5 file
    """
    with h5py.File(file_path, 'r') as file:
        data = {key: val[()] for key, val in file.items()}
    return data

train_data= load_hdf5(f'{root}/res/model_performance/train_iou_dlv3.h5')


val_data = load_hdf5(f'{root}/res/model_performance/val_iou_dlv3.h5')
val_data

{'002583_jpg.rf.d9a73a4c7d0f131abe4afd1cf6cb7643.jpg': 0.8646055,
 '002584_jpg.rf.df7de4720f9fe5ab3d1ab2c8beae10aa.jpg': 0.90118337,
 '002588_jpg.rf.c15adcfef31164e442dac07f4446f4c5.jpg': 0.87565553,
 '002595_jpg.rf.9f2c0ffbe46253b2ac3456eb24edf23a.jpg': 0.76098657,
 '002596_jpg.rf.923de0d457c9ad49c78fe0c936d1a7c3.jpg': 0.60054594,
 '002599_jpg.rf.01c95c8b0b6ccdd565e68c2bd538e895.jpg': 0.8110784,
 '002600_jpg.rf.e1ae5a6e3ebf4268d25207a95a9ab49b.jpg': 0.79282916,
 '002605_jpg.rf.b33cad48b2068888dab6857f1e069607.jpg': 0.4241023,
 '002619_jpg.rf.c443b7e27337b81202ace674159f0268.jpg': 0.6540473,
 '002625_jpg.rf.03479900b5e6c512ce312646551a18df.jpg': 0.54075384,
 '002625_jpg.rf.18b3b6bb6e3cffdc1d852e614a3fa65d.jpg': 0.5229757,
 '002625_jpg.rf.b9602442e461429fcbabe4eaa2f265a6.jpg': 0.51529276,
 '002629_jpg.rf.95f4c38c9eff1e94bdb97cbb9a5ab7fa.jpg': 0.7778227,
 '002630_jpg.rf.da3c3f72e863204de7ace115999e04d7.jpg': 0.8463952,
 '002631_jpg.rf.ee4f7ee0ee7061108a7b41d44d363e69.jpg': 0.6679689,
 '0

## Check that data is loaded correctly

In [23]:
print("Shape of train_iou:", len(train_data))
print("Shape of val_iou:", len(val_data))

Shape of train_iou: 1782
Shape of val_iou: 187


# Compute pairwise IoU differences between training and validation set

In [None]:
for model in ['dlv3', 'dlv3p', 'unet', 'pan']:
    train_iou = load_hdf5(f'{root}res/model_performance/train_iou_{model}.h5')['train_iou']
    val_iou = load_hdf5(f'{root}res/model_performance/val_iou_{model}.h5')['val_iou']
    idx_pair = []
    iou_diff = []

    for i, t_iou in enumerate(train_iou):
        for j, v_iou in enumerate(val_iou):
            iou_diff.append(t_iou - v_iou)
            idx_pair.append((i, j))

    save_to_hdf5(f'{root}res/model_performance/iou_diff_{model}.h5', {'iou_diff': iou_diff, 'idx_pair': idx_pair})


# Check that data is saved correctly

In [None]:
comp_data = load_hdf5(f'{root}res/model_performance/iou_diff_dlv3.h5')
print("Shape of iou_diff:", comp_data['iou_diff'].shape)
print("Number of pairs:", len(comp_data['idx_pair']))
for i in range(100):
    print(f"Pair {i}: {comp_data['iou_diff'][i+1000]}")
    print(f"Index pair {i}: {comp_data['idx_pair'][i+1000]}")