In [1]:
import numpy as np

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import Compose, Resize
import segmentation_models_pytorch as smp

from src.models.BaselineModel import BaselineModel
from src.evaluation.evaluate_result import evaluate_result
from src.datasets.DubaiSemanticSegmentationDataset import (
    DubaiSemanticSegmentationDataset,
)

from src.datasets.utils.ResizeToDivisibleBy32 import ResizeToDivisibleBy32

  from .autonotebook import tqdm as notebook_tqdm


## Prepare environment

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
VAL_SIZE = 0.2
BATCH_SIZE = 1
SEED = 42
DUBAI_DATASET_PATH = "data/DubaiSemanticSegmentationDataset"
IMAGE_SIZE = 576

In [5]:
train_dataset = DubaiSemanticSegmentationDataset(
    DUBAI_DATASET_PATH,
    transforms=[Compose([Resize(IMAGE_SIZE), ResizeToDivisibleBy32()])],
)
print(len(train_dataset))

72


## Sanity check data

In [6]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

In [7]:
for images, masks in train_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 576, 736])
torch.Size([1, 3, 576, 736])


In [8]:
to_pil_transform = transforms.ToPILImage()

In [9]:
img = to_pil_transform(images.squeeze())

In [10]:
# img.show()

In [11]:
msk = to_pil_transform(masks.squeeze())

In [12]:
# msk.show()

## Run example model on single image

In [13]:
model = smp.Unet(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=6,  # model output channels (number of classes in your dataset)
    activation="softmax",
).to(device)

In [14]:
baseline_model = BaselineModel(classes=6).to(device)

In [15]:
torch.cuda.empty_cache()

In [16]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        # (batch_size, channels, height, width)
        # (B, C, H, W)
        break
    output = model(images.to(device))
    output = torch.argmax(output, dim=1)

    output_baseline = baseline_model(images.to(device))
    output_baseline = torch.argmax(output_baseline, dim=1)

torch.Size([1, 3, 576, 640])
torch.Size([1, 3, 576, 640])


  return self._call_impl(*args, **kwargs)


In [17]:
print(masks.max())

tensor(254, dtype=torch.uint8)


In [18]:
print(masks.min())

tensor(41, dtype=torch.uint8)


In [19]:
print(output.shape)

torch.Size([1, 576, 640])


In [20]:
print(output_baseline.shape)

torch.Size([1, 576, 640])


In [23]:
unique, counts = np.unique(output.cpu(), return_counts=True)
print(dict(zip(unique, counts)))

{0: 1668, 1: 19413, 2: 43, 3: 244335, 4: 2279, 5: 100902}


In [24]:
unique, counts = np.unique(masks.cpu(), return_counts=True)
print(dict(zip(unique, counts)))

{41: 363684, 42: 284, 43: 184, 44: 172, 45: 108, 46: 99, 47: 94, 48: 93, 49: 71, 50: 86, 51: 60, 52: 68, 53: 65, 54: 77, 55: 59, 56: 59, 57: 80, 58: 1055, 59: 80, 60: 67, 61: 41, 62: 59, 63: 68, 64: 46, 65: 40, 66: 37, 67: 46, 68: 48, 69: 36, 70: 37, 71: 45, 72: 27, 73: 30, 74: 37, 75: 32, 76: 30, 77: 22, 78: 25, 79: 30, 80: 34, 81: 35, 82: 35, 83: 30, 84: 37, 85: 25, 86: 33, 87: 48, 88: 29, 89: 30, 90: 39, 91: 29, 92: 15, 93: 19, 94: 29, 95: 22, 96: 20, 97: 31, 98: 29, 99: 34, 100: 37, 101: 28, 102: 24, 103: 28, 104: 42, 105: 31, 106: 24, 107: 28, 108: 23, 109: 25, 110: 33, 111: 33, 112: 34, 113: 29, 114: 23, 115: 20, 116: 36, 117: 25, 118: 24, 119: 31, 120: 25, 121: 22, 122: 27, 123: 25, 124: 36, 125: 26, 126: 33, 127: 22, 128: 33, 129: 33, 130: 22, 131: 25, 132: 64112, 133: 221, 134: 152, 135: 103, 136: 102, 137: 119, 138: 79, 139: 91, 140: 79, 141: 78, 142: 70, 143: 76, 144: 76, 145: 61, 146: 99, 147: 60, 148: 72, 149: 55, 150: 70, 151: 63, 152: 72, 153: 90, 154: 60, 155: 142, 156:

In [26]:
# FIXME: wykonanie tego bez błędu == tensory mają takie same wymiary
evaluate_result(output, masks, mode="multiclass", num_classes=6)

ValueError: Dimensions should match, but ``output`` shape is not equal to ``target`` shape, torch.Size([576, 640]) != torch.Size([3, 576, 640])

In [None]:
evaluate_result(output_baseline, masks)

## Run example model on dataset

In [None]:
model.eval()
metrics_dict = {
    "iou": [],
    "f1": [],
    "accuracy": [],
    "recall": [],
}

In [None]:
with torch.no_grad():
    for images, masks in train_loader:
        output = model(images.to(device))
        # output = (output > 0.5).float()

        iter_metrics = evaluate_result(output, masks)
        for key in metrics_dict.keys():
            metrics_dict[key].append(iter_metrics[key])

In [None]:
print(metrics_dict)