In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import segmentation_models_pytorch as smp


from src.models.BaselineModel import BaselineModel
from src.evaluation.evaluate_result import evaluate_result

from src.datasets.UAVidSemanticSegmentationDataset import (
    UAVidSemanticSegmentationDataset,
)

from src.datasets.utils.ResizeToDivisibleBy32 import ResizeToDivisibleBy32

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
torch.cuda.is_available()

True

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [17]:
VAL_SIZE = 0.2
BATCH_SIZE = 1
SEED = 42
UAVID_DATASET_PATH = "data/UAVidSemanticSegmentationDataset"


In [3]:
train_dataset = UAVidSemanticSegmentationDataset(
    UAVID_DATASET_PATH, transforms=[ResizeToDivisibleBy32()]
)
print(len(train_dataset))

200


In [4]:
val_dataset = UAVidSemanticSegmentationDataset(
    UAVID_DATASET_PATH, split="valid", transforms=[ResizeToDivisibleBy32()]
)
print(len(val_dataset))

70


In [5]:
test_dataset = UAVidSemanticSegmentationDataset(
    UAVID_DATASET_PATH, split="test", transforms=[ResizeToDivisibleBy32()]
)
print(len(test_dataset))

10


In [6]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [7]:
for images, masks in train_loader:
    print(images.shape)
    print(masks.shape)
    break

torch.Size([1, 3, 2176, 3840])
torch.Size([1, 3, 2176, 3840])


In [8]:
to_pil_transform = transforms.ToPILImage()
img = to_pil_transform(images.squeeze())

In [21]:
# img.show()

In [10]:
msk = to_pil_transform(masks.squeeze())

In [22]:
# msk.show()

In [18]:
model = smp.Unet(
    encoder_name="resnet18",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=8,  # model output channels (number of classes in your dataset)
).to(device)

In [35]:
model.eval()
with torch.no_grad():
    for images, masks in train_loader:
        print(images.shape)
        print(masks.shape)
        break
    output = model(images.to(device))
    output = torch.softmax(output, dim=1)
    print(output.shape)
    output = output.argmax(dim=1).float()
    print(output.shape)
    # output = (output > 0.5).float()

torch.Size([1, 3, 2176, 3840])
torch.Size([1, 3, 2176, 3840])
torch.Size([1, 8, 2176, 3840])
torch.Size([1, 2176, 3840])


In [36]:
print(output.shape)

torch.Size([1, 2176, 3840])


In [37]:
outp = to_pil_transform(output.squeeze().cpu().numpy())

In [38]:
unique, counts = np.unique(outp, return_counts=True)
print(dict(zip(unique, counts)))

{0: 148, 251: 72, 252: 2505, 253: 6372586, 255: 1980529}


In [41]:
unique, counts = np.unique(output.cpu(), return_counts=True)
print(dict(zip(unique, counts)))

{0.0: 148, 1.0: 1980529, 3.0: 6372586, 4.0: 2505, 5.0: 72}


In [39]:
outp.show()

In [14]:
model.eval()

with torch.no_grad():
    for images, masks in train_loader:

        height = images.shape[2]
        width = images.shape[3]

        if height % 32 == 0 and width % 32 == 0:
            print("Height and width are divisible by 32")
        else:
            print("Height and width are not divisible by 32")

        output = model(images)
        output = torch.softmax(output, dim=1)
        class_output = torch.argmax(output, dim=1)

        print("output")
        print(type(output))
        print(output.shape)
        print(output.max())
        print(output.min())
        print()
        print("masks")
        print(type(masks))
        print(masks.shape)
        print(masks.max())
        print(masks.min())

        images = to_pil_transform(images.squeeze())
        # images.show()
        images_array = np.array(images)

        # Show the images using matplotlib
        # plt.imshow(images_array, cmap='gray')
        # plt.axis('off')

        # output = to_pil_transform(output.squeeze())
        # output.show()
        output_array = np.array(output.squeeze())

        # Show the output using matplotlib
        # plt.figure()
        # plt.imshow(output_array, cmap='gray')
        # plt.axis('off')
        # plt.show()

        class_output_array = np.array(class_output.squeeze())

        masks = to_pil_transform(masks.squeeze())
        # masks.show()
        # Convert the PIL Image to a numpy array
        mask_array = np.array(msk)

        # Show the mask using matplotlib
        # plt.imshow(mask_array, cmap='gray')
        # plt.axis('off')
        # plt.show()

        # tp, fp, fn, tn = smp.metrics.get_stats(
        #     output, masks, mode="multilabel", threshold=0.5
        # )
        # print(f"TP: {tp}")
        # print(f"FP: {fp}")
        # print(f"TN: {tn}")
        # print(f"FN: {fn}")

        # iou_score = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro")
        # print(f"IoU score: {iou_score}")

        break

Height and width are divisible by 32
output
<class 'torch.Tensor'>
torch.Size([1, 20, 2176, 3840])
tensor(0.1911)
tensor(0.0080)

masks
<class 'torch.Tensor'>
torch.Size([1, 3, 2176, 3840])
tensor(0.7529)
tensor(0.)


In [19]:
plt.imshow(images_array, cmap="gray")
plt.axis("off")
plt.show()

NameError: name 'images_array' is not defined

In [20]:
plt.imshow(mask_array, cmap="gray")
plt.axis("off")
plt.show()

NameError: name 'mask_array' is not defined

In [None]:
# plt.imshow(output_array, cmap='gray')
# plt.axis('off')
# plt.show()
plt.matshow(output_array[0, :, :])
plt.matshow(output_array[1, :, :])
plt.matshow(output_array[2, :, :])
plt.matshow(output_array[3, :, :])
plt.matshow(output_array[4, :, :])
plt.matshow(output_array[5, :, :])

In [None]:
plt.imshow(class_output_array, cmap="viridis")
plt.axis("off")
plt.show()

In [None]:
plt.matshow(class_output_array)

In [None]:
class_output_array

In [None]:
class_output_array.max()

In [None]:
class_output_array.min()

In [None]:
unique, counts = np.unique(class_output_array, return_counts=True)
print(dict(zip(unique, counts)))