In [2]:
import numpy as np

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import Resize
import segmentation_models_pytorch as smp

from src.models.BaselineModel import BaselineModel
from src.evaluation.evaluate_result import evaluate_result
from src.datasets.INRIAAerialImageLabellingDataset import (
    INRIAAerialImageLabellingDataset,
)

from src.datasets.utils.ResizeToDivisibleBy32 import ResizeToDivisibleBy32

## Prepare environment

In [3]:
torch.cuda.is_available()

True

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
VAL_SIZE = 0.2
BATCH_SIZE = 1
SEED = 42
INRIA_DATASET_PATH = "data/INRIAAerialImageLabellingDataset"
IMAGE_SIZE = 576

In [6]:
labeled_dataset = INRIAAerialImageLabellingDataset(
    INRIA_DATASET_PATH, transforms=[
        Resize(IMAGE_SIZE),
        # ResizeToDivisibleBy32()
        ]
)
print(len(labeled_dataset))

data/INRIAAerialImageLabellingDataset\train
180


In [7]:
test_dataset = INRIAAerialImageLabellingDataset(
    INRIA_DATASET_PATH, split="test", transforms=[
        Resize(IMAGE_SIZE),
        # ResizeToDivisibleBy32()
        ]
)
print(len(test_dataset))

data/INRIAAerialImageLabellingDataset\test
144


## Sanity check data

In [8]:
train_loader = DataLoader(labeled_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [9]:
for images, masks in train_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [10]:
to_pil_transform = transforms.ToPILImage()

In [11]:
img = to_pil_transform(images.squeeze())

In [12]:
# img.show()

In [13]:
msk = to_pil_transform(masks.squeeze())

In [14]:
# msk.show()

# UNET

## Run example model on single image

In [15]:
model = smp.Unet(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,  # model output channels (number of classes in your dataset)
).to(device)

In [16]:
baseline_model = BaselineModel()

In [17]:
torch.cuda.empty_cache()

In [18]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [19]:
print(output.shape)

torch.Size([1, 1, 576, 576])


In [20]:
print(output_baseline.shape)

torch.Size([1, 1, 576, 576])


In [20]:
outp = to_pil_transform(output.squeeze().cpu().numpy())

In [21]:
unique, counts = np.unique(outp, return_counts=True)
print(dict(zip(unique, counts)))

{0: 25126332, 255: 114244}


In [22]:
unique, counts = np.unique(output.cpu(), return_counts=True)
print(dict(zip(unique, counts)))

{0.0: 25126332, 1.0: 114244}


In [23]:
# outp.show()

In [24]:
rand_outp = to_pil_transform(output_baseline.squeeze().cpu().numpy())

In [25]:
unique, counts = np.unique(rand_outp, return_counts=True)
print(dict(zip(unique, counts)))

{0: 12622155, 255: 12618421}


In [26]:
# rand_outp.show()

In [27]:
evaluate_result(output, masks)

{'iou': tensor(0.0053),
 'f1': tensor(0.0105),
 'accuracy': tensor(0.6975),
 'recall': tensor(0.0057)}

In [28]:
evaluate_result(output_baseline, masks)

{'iou': tensor(0.2314),
 'f1': tensor(0.3759),
 'accuracy': tensor(0.5000),
 'recall': tensor(0.4998)}

In [29]:
evaluate_result(masks, masks)

{'iou': tensor(1.),
 'f1': tensor(1.),
 'accuracy': tensor(1.),
 'recall': tensor(1.)}

## Run example model on dataset

In [30]:
model.eval()
metrics_dict = {
    "iou": [],
    "f1": [],
    "accuracy": [],
    "recall": [],
}

In [31]:
with torch.no_grad():
    for images, masks in train_loader:
        output = model(images.to(device))
        output = (output > 0.5).float()

        iter_metrics = evaluate_result(output, masks)
        for key in metrics_dict.keys():
            metrics_dict[key].append(iter_metrics[key])

In [32]:
print(metrics_dict)

{'iou': [tensor(0.0257), tensor(0.0089), tensor(0.0280), tensor(0.0279), tensor(0.0046), tensor(0.0053), tensor(0.0109), tensor(0.0063), tensor(0.0075), tensor(0.0046), tensor(0.0026), tensor(0.0040), tensor(0.0050), tensor(0.0666), tensor(0.0043), tensor(0.0323), tensor(0.0087), tensor(0.0068), tensor(0.0095), tensor(0.0042), tensor(0.0197), tensor(0.0040), tensor(0.0016), tensor(0.0178), tensor(0.0068), tensor(0.0022), tensor(0.0163), tensor(0.0114), tensor(0.0041), tensor(0.0232), tensor(0.0023), tensor(0.0026), tensor(0.0089), tensor(0.0044), tensor(0.0100), tensor(0.0050), tensor(0.0188), tensor(0.0029), tensor(0.0039), tensor(0.0066), tensor(0.0156), tensor(0.0092), tensor(0.0075), tensor(0.0040), tensor(0.0024), tensor(0.0072), tensor(0.0022), tensor(0.0067), tensor(0.0036), tensor(0.0048), tensor(0.0084), tensor(0.0106), tensor(0.0479), tensor(0.0037), tensor(0.0019), tensor(0.0094), tensor(0.0253), tensor(0.0056), tensor(0.0021), tensor(0.0021), tensor(0.0051), tensor(0.0089),

In [37]:
print({key: np.mean(value) for key, value in metrics_dict.items()})

{'iou': 0.010615132, 'f1': 0.020689072, 'accuracy': 0.8447621, 'recall': 0.0944909}


# UNET++

## Run example model on single image

In [33]:
model = smp.UnetPlusPlus(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,  # model output channels (number of classes in your dataset)
).to(device)

In [34]:
baseline_model = BaselineModel()

In [35]:
torch.cuda.empty_cache()

In [36]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 5024, 5024])
torch.Size([1, 1, 5024, 5024])


OutOfMemoryError: CUDA out of memory. Tried to allocate 7.52 GiB. GPU 0 has a total capacity of 12.00 GiB of which 0 bytes is free. Of the allocated memory 16.28 GiB is allocated by PyTorch, and 7.50 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
print(output.shape)

In [None]:
print(output_baseline.shape)