In [2]:
import numpy as np

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import segmentation_models_pytorch as smp

from src.models.BaselineModel import BaselineModel
from src.evaluation.evaluate_result import evaluate_result
from src.datasets.UAVidSemanticSegmentationDataset import (
    UAVidSemanticSegmentationDataset,
)

from src.datasets.utils.ResizeToDivisibleBy32 import ResizeToDivisibleBy32

  from .autonotebook import tqdm as notebook_tqdm


## Prepare environment

In [3]:
torch.cuda.is_available()

True

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
VAL_SIZE = 0.2
BATCH_SIZE = 1
SEED = 42
UAVID_DATASET_PATH = "data/UAVidSemanticSegmentationDataset"

In [6]:
train_dataset = UAVidSemanticSegmentationDataset(
    UAVID_DATASET_PATH, transforms=[ResizeToDivisibleBy32()]
)
print(len(train_dataset))

200


In [7]:
val_dataset = UAVidSemanticSegmentationDataset(
    UAVID_DATASET_PATH, split="valid", transforms=[ResizeToDivisibleBy32()]
)
print(len(val_dataset))

70


In [8]:
test_dataset = UAVidSemanticSegmentationDataset(
    UAVID_DATASET_PATH, split="test", transforms=[ResizeToDivisibleBy32()]
)
print(len(test_dataset))

10


## Sanity check data

In [9]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [10]:
for images, masks in train_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 2176, 3840])
torch.Size([1, 3, 2176, 3840])


In [11]:
for images, masks in val_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 2176, 3840])
torch.Size([1, 3, 2176, 3840])


In [12]:
for images in test_loader:
    print(images.shape)
    break

torch.Size([1, 3, 2176, 3840])


In [13]:
to_pil_transform = transforms.ToPILImage()

In [14]:
img = to_pil_transform(images.squeeze())

In [15]:
# img.show()

In [16]:
msk = to_pil_transform(masks.squeeze())

In [17]:
# msk.show()

## Run example model on single image

In [18]:
model = smp.Unet(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=8,  # model output channels (number of classes in your dataset)
).to(device)

In [19]:
baseline_model = BaselineModel(classes=8).to(device)

In [20]:
torch.cuda.empty_cache()

In [21]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    # output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    # output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 2176, 3840])
torch.Size([1, 3, 2176, 3840])


In [22]:
print(output.shape)

torch.Size([1, 8, 2176, 3840])


In [23]:
print(output_baseline.shape)

torch.Size([1, 8, 2176, 3840])


In [34]:
unique, counts = np.unique(output.cpu(), return_counts=True)
print(dict(zip(unique, counts)))

In [24]:
evaluate_result(output, masks)

ValueError: Dimensions should match, but ``output`` shape is not equal to ``target`` shape, torch.Size([8, 2176, 3840]) != torch.Size([3, 2176, 3840])

In [None]:
evaluate_result(output_baseline, masks)

{'iou': tensor(0.0683),
 'f1': tensor(0.1278),
 'accuracy': tensor(0.5000),
 'recall': tensor(0.5166)}

## Run example model on dataset

In [25]:
model.eval()
metrics_dict = {
    "iou": [],
    "f1": [],
    "accuracy": [],
    "recall": [],
}

In [33]:
with torch.no_grad():
    for images, masks in train_loader:
        output = model(images.to(device))
        output = (output > 0.5).float()

        iter_metrics = evaluate_result(output, masks)
        for key in metrics_dict.keys():
            metrics_dict[key].append(iter_metrics[key])

In [None]:
print(metrics_dict)

{'iou': [tensor(7.7134e-07)], 'f1': [tensor(1.5427e-06)], 'accuracy': [tensor(0.8459)], 'recall': [tensor(1.0581e-06)]}
