In [1]:
import numpy as np

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import segmentation_models_pytorch as smp

from src.models.BaselineModel import BaselineModel
from src.evaluation.evaluate_result import evaluate_result
from src.datasets.DubaiSemanticSegmentationDataset import (
    DubaiSemanticSegmentationDataset,
)

from src.datasets.utils.ResizeToDivisibleBy32 import ResizeToDivisibleBy32

  from .autonotebook import tqdm as notebook_tqdm


## Prepare environment

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
VAL_SIZE = 0.2
BATCH_SIZE = 1
SEED = 42
DUBAI_DATASET_PATH = "data/DubaiSemanticSegmentationDataset"

In [5]:
train_dataset = DubaiSemanticSegmentationDataset(
    DUBAI_DATASET_PATH, transforms=[ResizeToDivisibleBy32()]
)
print(len(train_dataset))

72


## Sanity check data

In [6]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

In [7]:
for images, masks in train_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 544, 512])
torch.Size([1, 1, 544, 512])


In [8]:
to_pil_transform = transforms.ToPILImage()

In [9]:
img = to_pil_transform(images.squeeze())

In [10]:
# img.show()

In [11]:
msk = to_pil_transform(masks.squeeze())

In [12]:
# msk.show()

## Run example model on single image

In [13]:
model = smp.Unet(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=6,  # model output channels (number of classes in your dataset)
).to(device)

In [14]:
baseline_model = BaselineModel(classes=6).to(device)

In [15]:
torch.cuda.empty_cache()

In [16]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        # (batch_size, channels, height, width)
        # (B, C, H, W)
        break
    output = model(images.to(device))
    # output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    # output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 544, 512])
torch.Size([1, 1, 544, 512])


In [30]:
print(masks.max())

tensor(0.0196)


In [31]:
print(masks.min())

tensor(0.)


In [17]:
print(output.shape)

torch.Size([1, 6, 544, 512])


In [18]:
print(output_baseline.shape)

torch.Size([1, 6, 544, 512])


In [19]:
unique, counts = np.unique(output.cpu(), return_counts=True)
print(len(unique))
print(len(counts))

1640634
1640634


In [20]:
unique, counts = np.unique(masks.cpu(), return_counts=True)
print(len(unique))
print(len(counts))
print(dict(zip(unique, counts)))

1636
1636
{0.0: 134884, 7.659314e-06: 16, 1.1488971e-05: 12, 1.9148285e-05: 1, 3.829657e-05: 18, 4.2126227e-05: 3, 5.36152e-05: 17, 5.7444857e-05: 1, 6.510417e-05: 2, 6.893383e-05: 8, 8.04228e-05: 6, 8.425245e-05: 16, 8.808211e-05: 2, 9.574143e-05: 2, 9.957109e-05: 15, 0.000103400744: 1, 0.00011106006: 1, 0.00011488971: 9, 0.00011871937: 2, 0.00012637868: 1, 0.00013020834: 19, 0.000134038: 3, 0.00014169731: 1, 0.00014552697: 16, 0.00014935662: 3, 0.00015701594: 1, 0.0001608456: 2, 0.00016467525: 2, 0.00017233456: 1, 0.00017616422: 14, 0.00017999388: 3, 0.0001876532: 2, 0.00019148286: 16, 0.00019531252: 1, 0.00020297183: 2, 0.00020680149: 3, 0.00021063114: 1, 0.00021829044: 1, 0.00021829046: 1, 0.00022212011: 17, 0.00022594977: 2, 0.00023360908: 2, 0.00023743874: 14, 0.0002412684: 1, 0.0002489277: 3, 0.00025275737: 5, 0.00025658702: 2, 0.00026424634: 2, 0.000268076: 15, 0.00027190565: 1, 0.00027956496: 2, 0.00028339462: 14, 0.00028722428: 3, 0.0002948836: 3, 0.00029871325: 6, 0.00030254

In [21]:
unique, counts = np.unique(torch.round(masks.cpu(), decimals=3), return_counts=True)
print(len(unique))
print(len(counts))
print(dict(zip(unique, counts)))

21
21
{0.0: 135429, 0.001: 1110, 0.002: 1092, 0.003: 1003, 0.004: 18437, 0.005: 461, 0.006: 599, 0.007: 695, 0.008: 33282, 0.009: 199, 0.01: 209, 0.011: 166, 0.012: 31454, 0.013: 76, 0.014: 183, 0.015: 248, 0.016: 41322, 0.017: 83, 0.018: 63, 0.019: 90, 0.02: 12327}


In [23]:
# FIXME: wykonanie tego bez błędu == tensory mają takie same wymiary
evaluate_result(output, masks, "multiclass")

ValueError: Dimensions should match, but ``output`` shape is not equal to ``target`` shape, torch.Size([6, 544, 512]) != torch.Size([544, 512])

In [None]:
evaluate_result(output_baseline, masks)

## Run example model on dataset

In [None]:
model.eval()
metrics_dict = {
    "iou": [],
    "f1": [],
    "accuracy": [],
    "recall": [],
}

In [None]:
with torch.no_grad():
    for images, masks in train_loader:
        output = model(images.to(device))
        # output = (output > 0.5).float()

        iter_metrics = evaluate_result(output, masks)
        for key in metrics_dict.keys():
            metrics_dict[key].append(iter_metrics[key])

In [None]:
print(metrics_dict)