In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import Resize
import segmentation_models_pytorch as smp

from src.models.BaselineModel import BaselineModel
from src.evaluation.evaluate_result import evaluate_result
from src.datasets.INRIAAerialImageLabellingDataset import (
    INRIAAerialImageLabellingDataset,
)

from src.datasets.utils.ResizeToDivisibleBy32 import ResizeToDivisibleBy32

  from .autonotebook import tqdm as notebook_tqdm


## Prepare environment

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
VAL_SIZE = 0.2
BATCH_SIZE = 1
SEED = 42
IMAGE_SIZE = 576
INRIA_DATASET_PATH = "data/INRIAAerialImageLabellingDataset"  # home PC
# INRIA_DATASET_PATH = "data/TestSubsets/INRIAAerialImageLabellingDataset"  # laptop

In [5]:
labeled_dataset = INRIAAerialImageLabellingDataset(
    INRIA_DATASET_PATH,
    transforms=[
        Resize(IMAGE_SIZE),
        # ResizeToDivisibleBy32()
    ],
)
print(len(labeled_dataset))

data/INRIAAerialImageLabellingDataset\train
180


In [6]:
test_dataset = INRIAAerialImageLabellingDataset(
    INRIA_DATASET_PATH,
    split="test",
    transforms=[
        Resize(IMAGE_SIZE),
        # ResizeToDivisibleBy32()
    ],
)
print(len(test_dataset))

data/INRIAAerialImageLabellingDataset\test
144


## Sanity check data

In [7]:
train_loader = DataLoader(labeled_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [8]:
for images, masks in train_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [9]:
to_pil_transform = transforms.ToPILImage()

In [10]:
img = to_pil_transform(images.squeeze())

In [11]:
# img.show()

In [12]:
msk = to_pil_transform(masks.squeeze())

In [13]:
# msk.show()

# UNET

## Run example model on single image

In [14]:
model = smp.Unet(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,  # model output channels (number of classes in your dataset)
).to(device)

In [15]:
baseline_model = BaselineModel()

In [16]:
torch.cuda.empty_cache()

In [17]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [18]:
print(output.shape)

torch.Size([1, 1, 576, 576])


In [19]:
print(output_baseline.shape)

torch.Size([1, 1, 576, 576])


In [20]:
outp = to_pil_transform(output.squeeze().cpu().numpy())

In [21]:
unique, counts = np.unique(outp, return_counts=True)
print(dict(zip(unique, counts)))

{0: 331764, 255: 12}


In [22]:
unique, counts = np.unique(output.cpu(), return_counts=True)
print(dict(zip(unique, counts)))

{0.0: 331764, 1.0: 12}


In [23]:
# outp.show()

In [24]:
rand_outp = to_pil_transform(output_baseline.squeeze().cpu().numpy())

In [25]:
unique, counts = np.unique(rand_outp, return_counts=True)
print(dict(zip(unique, counts)))

{0: 166195, 255: 165581}


In [26]:
# rand_outp.show()

In [27]:
evaluate_result(output, masks)

{'iou': tensor(5.2252e-05),
 'f1': tensor(0.0001),
 'accuracy': tensor(-52.7583),
 'recall': tensor(3.6287e-05)}

In [28]:
evaluate_result(output_baseline, masks)

{'iou': tensor(0.9774),
 'f1': tensor(0.9886),
 'accuracy': tensor(0.3809),
 'recall': tensor(0.4984)}

In [29]:
evaluate_result(masks, masks)

{'iou': tensor(-1.0087),
 'f1': tensor(231.4211),
 'accuracy': tensor(24777.6680),
 'recall': tensor(231.1913)}

## Run example model on dataset

In [30]:
model.eval()
metrics_dict = {
    "iou": [],
    "f1": [],
    "accuracy": [],
    "recall": [],
}

In [31]:
with torch.no_grad():
    for images, masks in train_loader:
        output = model(images.to(device))
        output = (output > 0.5).float()

        iter_metrics = evaluate_result(output, masks)
        for key in metrics_dict.keys():
            metrics_dict[key].append(iter_metrics[key])

In [32]:
print(metrics_dict)

{'iou': [tensor(0.), tensor(0.), tensor(0.0009), tensor(0.), tensor(0.), tensor(0.), tensor(0.), tensor(2.7233e-05), tensor(0.), tensor(0.), tensor(1.9778e-05), tensor(0.), tensor(0.), tensor(0.), tensor(5.7072e-05), tensor(3.4782e-05), tensor(5.4366e-06), tensor(0.), tensor(0.), tensor(0.), tensor(0.), tensor(0.), tensor(0.), tensor(0.), tensor(1.1338e-05), tensor(6.0442e-06), tensor(2.0260e-05), tensor(0.), tensor(4.6689e-07), tensor(2.1716e-05), tensor(0.0004), tensor(0.), tensor(0.), tensor(4.7792e-06), tensor(0.), tensor(1.1329e-06), tensor(7.8165e-06), tensor(5.1175e-06), tensor(0.), tensor(0.), tensor(0.), tensor(3.0650e-06), tensor(5.2252e-05), tensor(0.), tensor(1.3084e-07), tensor(0.), tensor(0.), tensor(0.), tensor(0.0002), tensor(0.), tensor(1.5271e-05), tensor(0.), tensor(5.1855e-06), tensor(0.), tensor(0.0001), tensor(6.4458e-05), tensor(0.), tensor(0.), tensor(0.), tensor(5.1268e-05), tensor(3.5380e-05), tensor(9.9601e-05), tensor(0.), tensor(0.), tensor(0.), tensor(1.21

In [33]:
print({key: np.mean(value) for key, value in metrics_dict.items()})

{'iou': 6.4742766e-05, 'f1': 0.00012909684, 'accuracy': -39.24348, 'recall': 0.06584109}


# UNET++

## Run example model on single image

In [34]:
model = smp.UnetPlusPlus(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,  # model output channels (number of classes in your dataset)
).to(device)

In [35]:
baseline_model = BaselineModel()

In [36]:
torch.cuda.empty_cache()

In [37]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [38]:
print(output.shape)

torch.Size([1, 1, 576, 576])


In [39]:
print(output_baseline.shape)

torch.Size([1, 1, 576, 576])


In [40]:
to_pil_transform = transforms.ToPILImage()

In [41]:
img = to_pil_transform(images.squeeze())

In [42]:
plt.imsave("assets/inria-example-image.jpeg", np.array(img))

In [43]:
# img.show()

In [44]:
msk = to_pil_transform(masks.squeeze())

In [45]:
plt.imsave("assets/inria-example-mask.jpeg", np.array(msk))

In [46]:
# msk.show()

In [47]:
outp = to_pil_transform(output.squeeze().cpu())

In [48]:
plt.imsave("assets/inria-example-unet-plus-plus-output.jpeg", np.array(outp))

In [49]:
# outp.show()

In [50]:
evaluate_result(output, masks)

{'iou': tensor(0.),
 'f1': tensor(0.),
 'accuracy': tensor(-36.6243),
 'recall': tensor(0.)}

## DeepLabV3

## Run example model on single image

In [51]:
model = smp.DeepLabV3(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,
).to(device)

In [52]:
baseline_model = BaselineModel()

In [53]:
torch.cuda.empty_cache()

In [54]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [55]:
print(output.shape)

torch.Size([1, 1, 576, 576])


In [56]:
print(output_baseline.shape)

torch.Size([1, 1, 576, 576])


In [57]:
to_pil_transform = transforms.ToPILImage()

In [58]:
img = to_pil_transform(images.squeeze())

In [59]:
# img.show()

In [60]:
msk = to_pil_transform(masks.squeeze())

In [61]:
# msk.show()

In [62]:
outp = to_pil_transform(output.squeeze().cpu())

In [63]:
plt.imsave("assets/inria-example-deep-lab-output.jpeg", np.array(outp))

In [64]:
# outp.show()

In [65]:
evaluate_result(output, masks)

{'iou': tensor(0.),
 'f1': tensor(0.),
 'accuracy': tensor(-20.1664),
 'recall': tensor(0.2431)}

## DeepLabV3+

## Run example model on single image

In [66]:
model = smp.DeepLabV3Plus(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,
).to(device)

In [67]:
baseline_model = BaselineModel()

In [68]:
torch.cuda.empty_cache()

In [69]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = (output > 0.5).float()

    output_baseline = baseline_model(images.to(device))
    output_baseline = (output_baseline > 0.5).float()

torch.Size([1, 3, 576, 576])
torch.Size([1, 1, 576, 576])


In [70]:
print(output.shape)

torch.Size([1, 1, 576, 576])


In [71]:
print(output_baseline.shape)

torch.Size([1, 1, 576, 576])


In [72]:
to_pil_transform = transforms.ToPILImage()

In [73]:
img = to_pil_transform(images.squeeze())

In [74]:
# img.show()

In [75]:
msk = to_pil_transform(masks.squeeze())

In [76]:
# msk.show()

In [77]:
outp = to_pil_transform(output.squeeze().cpu())

In [78]:
plt.imsave("assets/inria-example-deep-lab-v3-plus-output.jpeg", np.array(outp))

In [79]:
# outp.show()

In [80]:
evaluate_result(output, masks)

{'iou': tensor(0.),
 'f1': tensor(0.),
 'accuracy': tensor(-25.4879),
 'recall': tensor(0.)}