# Import libraries

In [1]:
import torch
import os
from torch.utils.data import DataLoader
from segmentation_models_pytorch.utils.metrics import IoU
from tqdm import tqdm

from src.utils import save_to_hdf5, load_hdf5, load_model
from src.datasets import ExcavatorDataset
from src.config import IMAGE_SIZE, TRANSFORMER, DEVICE
from models.Segmentation import DeepLabV3Model, DeepLabV3PlusModel, PyramidAttentionNetworkModel, UNetModel

  from .autonotebook import tqdm as notebook_tqdm


# Root

In [2]:
root = f'/home/ais/Bachelorarbeit/similarity_metrics_of_images/'
batch_size = 1

# Initialize models

**Note**: UNEt performs quite badly (only achieves `val IoU` of 0.78).

In [3]:
# DeepLabV3
dlv3 =DeepLabV3Model().model
dlv3.load_state_dict(torch.load(f'{root}models/torch_model_files/DeepLabV3_HybridFocalDiceLoss.pt'))
dlv3.to(DEVICE)
dlv3.eval()

# DeepLabV3Plus
dlv3p = DeepLabV3PlusModel().model
dlv3p.load_state_dict(torch.load(f'{root}models/torch_model_files/DeepLabV3Plus_HybridFocalDiceLoss.pt'))
dlv3p.to(DEVICE)
dlv3p.eval()

# UNet
unet = UNetModel().model
unet.load_state_dict(torch.load(f'{root}models/torch_model_files/UNet_HybridFocalDiceLoss.pt'))
unet.to(DEVICE)
unet.eval()

# Pyramid Attention Network
pan = PyramidAttentionNetworkModel().model
pan.load_state_dict(torch.load(f'{root}models/torch_model_files/PyramidAttentionNetwork_HybridFocalDiceLoss.pt'))
pan.to(DEVICE)
pan.eval()

2024-12-02 22:06:03,602 - DeepLabV3 - INFO - New model created with the following info:
                            - Encoder name: resnet18
                            - Activation: None
                            - Classes: 12
2024-12-02 22:06:03,606 - DeepLabV3 - INFO - Device used for model: cuda


  dlv3.load_state_dict(torch.load(f'{root}models/torch_model_files/DeepLabV3_HybridFocalDiceLoss.pt'))


2024-12-02 22:06:04,356 - DeepLabV3Plus - INFO - New model created with the following info:
                            - Encoder name: resnet18
                            - Activation: None
                            - Classes: 12
2024-12-02 22:06:04,359 - DeepLabV3Plus - INFO - Device used for model: cuda


  dlv3p.load_state_dict(torch.load(f'{root}models/torch_model_files/DeepLabV3Plus_HybridFocalDiceLoss.pt'))


2024-12-02 22:06:04,717 - UNet - INFO - New model created with the following info:
                            - Encoder name: resnet18
                            - Activation: None
                            - Classes: 12
2024-12-02 22:06:04,720 - UNet - INFO - Device used for model: cuda


  unet.load_state_dict(torch.load(f'{root}models/torch_model_files/UNet_HybridFocalDiceLoss.pt'))


2024-12-02 22:06:05,114 - PyramidAttentionNetwork - INFO - New model created with the following info:
                            - Encoder name: resnet18
                            - Activation: None
                            - Classes: 12
2024-12-02 22:06:05,116 - PyramidAttentionNetwork - INFO - Device used for model: cuda


  pan.load_state_dict(torch.load(f'{root}models/torch_model_files/PyramidAttentionNetwork_HybridFocalDiceLoss.pt'))


PAN(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

# Load dataset

In [4]:
train_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='train', transform=TRANSFORMER,one_hot_encode_mask=True)
print("Number of training samples:", num_train_imgs:=len(train_dataset))
val_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='test', transform=TRANSFORMER, one_hot_encode_mask=True)
print("Number of test samples:", num_val_imgs:=len(val_dataset))

Number of training samples: 1782
Number of test samples: 187


  key: torch.tensor(value / 255.0, dtype=torch.float32)


# Compute and save predicted masks

In [5]:
train_iou_dlv3 = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_iou_dlv3p = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_iou_unet = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_iou_pan = torch.zeros(num_train_imgs, dtype=torch.float32, device=DEVICE)
train_paths = []

val_iou_dlv3 = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_iou_dlv3p = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_iou_unet = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_iou_pan = torch.zeros(num_val_imgs, dtype=torch.float32, device=DEVICE)
val_paths = []

In [7]:

# Compute predicted masks for training set
for i, (imgs, masks, paths) in tqdm(enumerate(train_dataset)):
    imgs = imgs.to(DEVICE).unsqueeze(0)
    masks = masks.to(DEVICE)
    output_dlv3 = dlv3(imgs)
    output_dlv3p = dlv3p(imgs)
    output_unet = unet(imgs)
    output_pan = pan(imgs)
    train_iou_dlv3[i] = IoU()(output_dlv3, masks)
    train_iou_dlv3p[i] = IoU()(output_dlv3p, masks)
    train_iou_unet[i] = IoU()(output_unet, masks)
    train_iou_pan[i] = IoU()(output_pan, masks)
    train_paths.extend(paths)

train_paths = [path.replace('/', '|').replace('\\', '|') for path in train_paths]
save_to_hdf5(f'{root}res/model_performance/train_iou_dlv3.h5', {'train_iou': train_iou_dlv3.cpu().numpy(), 'train_paths': train_paths})
save_to_hdf5(f'{root}res/model_performance/train_iou_dlv3p.h5', {'train_iou': train_iou_dlv3p.cpu().numpy(), 'train_paths': train_paths})
save_to_hdf5(f'{root}res/model_performance/train_iou_unet.h5', {'train_iou': train_iou_unet.cpu().numpy(), 'train_paths': train_paths})
save_to_hdf5(f'{root}res/model_performance/train_iou_pan.h5', {'train_iou': train_iou_pan.cpu().numpy(), 'train_paths': train_paths})

# Compute predicted masks for validation set
for i, (imgs, masks, paths) in tqdm(enumerate(val_dataset)):
    imgs = imgs.to(DEVICE).unsqueeze(0)
    masks = masks.to(DEVICE)
    output_dlv3 = dlv3(imgs)
    output_dlv3p = dlv3p(imgs)
    output_unet = unet(imgs)
    output_pan = pan(imgs)
    val_iou_dlv3[i] = IoU()(output_dlv3, masks)
    val_iou_dlv3p[i] = IoU()(output_dlv3p, masks)
    val_iou_unet[i] = IoU()(output_unet, masks)
    val_iou_pan[i] = IoU()(output_pan, masks)
    val_paths.extend(paths)

val_paths = [path.replace('/', '|').replace('\\', '|') for path in val_paths]
save_to_hdf5(f'{root}res/model_performance/val_iou_dlv3.h5', {'val_iou': val_iou_dlv3.cpu().numpy(), 'val_paths': val_paths})
save_to_hdf5(f'{root}res/model_performance/val_iou_dlv3p.h5', {'val_iou': val_iou_dlv3p.cpu().numpy(), 'val_paths': val_paths})
save_to_hdf5(f'{root}res/model_performance/val_iou_unet.h5', {'val_iou': val_iou_unet.cpu().numpy(), 'val_paths': val_paths})
save_to_hdf5(f'{root}res/model_performance/val_iou_pan.h5', {'val_iou': val_iou_pan.cpu().numpy(), 'val_paths': val_paths})


1782it [00:57, 30.85it/s]
187it [00:05, 34.29it/s]


# Compute pairwise IoU differences between training and validation set

In [9]:
train_data= load_hdf5(f'{root}res/model_performance/train_iou.h5')
train_iou = train_data['train_iou']
train_paths = train_data['train_paths']

val_data = load_hdf5(f'{root}res/model_performance/val_iou.h5')
val_iou = val_data['val_iou']
val_paths = val_data['val_paths']

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = '/home/ais/Bachelorarbeit/similarity_metrics_of_images/res/model_performance/train_iou.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

## Check that data is loaded correctly

In [8]:
print("Shape of train_iou:", train_iou.shape)
print("Number of training samples:", len(train_paths))
print("Shape of val_iou:", val_iou.shape)
print("Number of validation samples:", len(val_paths))

Shape of train_iou: (1782,)
Number of training samples: 270505
Shape of val_iou: (187,)
Number of validation samples: 29093


# Compute pairwise IoU differences between training and validation set

In [11]:
for model in ['dlv3', 'dlv3p', 'unet', 'pan']:
    train_iou = load_hdf5(f'{root}res/model_performance/train_iou_{model}.h5')['train_iou']
    val_iou = load_hdf5(f'{root}res/model_performance/val_iou_{model}.h5')['val_iou']
    idx_pair = []
    iou_diff = []

    for i, t_iou in enumerate(train_iou):
        for j, v_iou in enumerate(val_iou):
            iou_diff.append(t_iou - v_iou)
            idx_pair.append((i, j))

    save_to_hdf5(f'{root}res/model_performance/iou_diff_{model}.h5', {'iou_diff': iou_diff, 'idx_pair': idx_pair})


# Check that data is saved correctly

In [17]:
comp_data = load_hdf5(f'{root}res/model_performance/iou_diff_dlv3.h5')
print("Shape of iou_diff:", comp_data['iou_diff'].shape)
print("Number of pairs:", len(comp_data['idx_pair']))
for i in range(100):
    print(f"Pair {i}: {comp_data['iou_diff'][i+1000]}")
    print(f"Index pair {i}: {comp_data['idx_pair'][i+1000]}")

Shape of iou_diff: (333234,)
Number of pairs: 333234
Pair 0: 0.033710867166519165
Index pair 0: [ 5 65]
Pair 1: 0.024200350046157837
Index pair 1: [ 5 66]
Pair 2: -0.02996695041656494
Index pair 2: [ 5 67]
Pair 3: 0.0017576515674591064
Index pair 3: [ 5 68]
Pair 4: -0.06966686248779297
Index pair 4: [ 5 69]
Pair 5: 0.07387089729309082
Index pair 5: [ 5 70]
Pair 6: -0.20941200852394104
Index pair 6: [ 5 71]
Pair 7: -0.0359133780002594
Index pair 7: [ 5 72]
Pair 8: -0.05518332123756409
Index pair 8: [ 5 73]
Pair 9: -0.10235503315925598
Index pair 9: [ 5 74]
Pair 10: -0.11664527654647827
Index pair 10: [ 5 75]
Pair 11: -0.08075404167175293
Index pair 11: [ 5 76]
Pair 12: -0.11365470290184021
Index pair 12: [ 5 77]
Pair 13: -0.11434110999107361
Index pair 13: [ 5 78]
Pair 14: -0.013910651206970215
Index pair 14: [ 5 79]
Pair 15: -0.12485209107398987
Index pair 15: [ 5 80]
Pair 16: -0.10711178183555603
Index pair 16: [ 5 81]
Pair 17: -0.06136474013328552
Index pair 17: [ 5 82]
Pair 18: -0.0