In [1]:
! pip install pandas

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[33mDEPRECATION: pytorch-lightning 1.5.4 has a non-standard dependency specifier torch>=1.7.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [2]:
import torch
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import cv2
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
DATA_DIR = './data/'
ENCODER = 'timm-mobilenetv3_small_minimal_100'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['sky']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multiclass segmentation
DEVICE = 'cuda'
import torch
import numpy as np
import segmentation_models_pytorch as smp


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

In [4]:
x_train_dir = os.path.join(DATA_DIR, 'train')
y_train_dir = os.path.join(DATA_DIR, 'trainannot')

x_valid_dir = os.path.join(DATA_DIR, 'val')
y_valid_dir = os.path.join(DATA_DIR, 'valannot')

x_test_dir = os.path.join(DATA_DIR, 'test')
y_test_dir = os.path.join(DATA_DIR, 'testannot')

In [5]:
class Dataset(BaseDataset):
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (Amentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (Amentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['sky']
    
    def __init__(
            self, 
            images_dir, 
            masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id.replace('.jpg','.png')) for image_id in self.ids]
        
        # convert str names to class values on masks
        self.class_values = [255]
        (print(self.class_values))
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.masks_fps[i], 0)
        
        # extract certain classes from mask (e.g. cars)
        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, mask
        
    def __len__(self):
        return len(self.ids)

In [6]:
import albumentations as A

In [7]:
def get_training_augmentation(size = 224):
    train_transform = [

        A.HorizontalFlip(p=0.5),  # Randomly flip the image horizontally
        # A.Rotate(limit=15, p=0.5),  # Randomly rotate the image by up to 15 degrees
        A.RandomBrightnessContrast(p=0.2),  # Randomly adjust brightness and contrast
        A.RandomGamma(p=0.2),  # Randomly adjust gamma
        A.Blur(blur_limit=3, p=0.1),  # Randomly apply blur
        A.GaussNoise(var_limit=(1, 5), p=0.1),  # Randomly add Gaussian noise
        A.OneOf([
            A.MotionBlur(p=0.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.GaussianBlur(blur_limit=3, p=0.1),
        ], p=0.2),  # Randomly choose one type of blur
        A.Resize(size, size),  # Resize the image to a fixed size
        # A.Normalize(), 
        ] # Normalize the image
    return A.Compose(train_transform)


def get_validation_augmentation(size = 224):
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
       A.Resize(size, size)
    ]
    return A.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: Amentations.Compose
    
    """
    
    _transform = [
        A.Lambda(image=preprocessing_fn),
        A.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return A.Compose(_transform)

In [8]:
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
# load best saved checkpoint

In [9]:
import segmentation_models_pytorch.utils.metrics as metrics

In [10]:
# Dice/F1 score - https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
# IoU/Jaccard score - https://en.wikipedia.org/wiki/Jaccard_index

loss = smp.utils.losses.DiceLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

In [11]:
# create test dataset
test_dataset = Dataset(
    x_test_dir, 
    y_test_dir, 
    augmentation=get_validation_augmentation(size=32), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

test_dataloader = DataLoader(test_dataset)

[255]


In [12]:
from tqdm import tqdm

In [13]:
# create test dataset
test_dataset_32 = Dataset(
    x_test_dir, 
    y_test_dir, 
    augmentation=get_validation_augmentation(size=32), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

test_dataloader_32 = DataLoader(test_dataset)

test_dataset_640 = Dataset(
    x_test_dir, 
    y_test_dir, 
    augmentation=get_validation_augmentation(size=640), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

test_dataloader_32 = DataLoader(test_dataset)

[255]
[255]


In [14]:
test_datasets = [Dataset(
    x_test_dir, 
    y_test_dir, 
    augmentation=get_validation_augmentation(size=size), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
) for size in range(32,641,32) ]

[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]
[255]


In [15]:
# test dataset without transformations for image visualization
test_dataset_vis = Dataset(
    x_test_dir, y_test_dir, 
    classes=CLASSES,
)

[255]


In [16]:
import pandas as pd
df = pd.DataFrame([],columns=['size','accuracy'])

In [17]:
exp_accuracy =[]
num_of_exp = len(test_datasets)
for exp in range(num_of_exp):
    iou_scores = np.array([])
    test_dataset = test_datasets[exp]
    best_model = torch.load(f'./models/best_model_{32*(1+exp)}x{32*(1+exp)}.pth')
    for i in tqdm(range(len(test_dataset)),):#tqdm(range(len(test_dataset_32))):
        
        image, gt_mask = test_dataset[i]
        _, gt_mask = test_datasets[num_of_exp-1][i]
        gt_mask = torch.tensor(np.expand_dims(gt_mask,0)).long().to(DEVICE)
        x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
        pr_mask = best_model.predict(x_tensor)
        pr_mask = (pr_mask)
        pr_mask_array = pr_mask.squeeze(1).to('cpu').numpy()
        resized_images = []
        for img in pr_mask_array:
            resized_img = cv2.resize(img, (640, 640), interpolation=cv2.INTER_NEAREST)
            resized_images.append(resized_img)
        resized_mask = torch.from_numpy(np.array(resized_images)).unsqueeze(1).to(DEVICE)
        # print(resized_mask)
        tp, fp, fn, tn = smp.metrics.get_stats(resized_mask, gt_mask, mode="binary", threshold=0.5)
        iou_score = smp.metrics.iou_score(tp, fp, fn, tn)
        iou_scores = np.append(iou_scores,iou_score.to('cpu').numpy())
        # resized_mask = (resized_mask.squeeze().cpu().numpy().round())
        # pr_mask = (pr_mask.squeeze().cpu().numpy().round())
        # _, gt_mask = test_dataset_640[i]
        # gt_mask = gt_mask.squeeze()
        # visualize(
        #     image=image_vis, 
        #     ground_truth_mask_640=gt_mask, 
        #     predicted_mask_640=resized_mask
        # )
    iou_scores.sum()/len(test_dataset)
    df = pd.concat([df, pd.DataFrame([{'size':32*(1+exp),'accuracy':iou_scores.sum()/len(test_dataset)}])], ignore_index=True)
    
        

  0%|          | 6/2251 [00:00<01:33, 24.10it/s]

100%|██████████| 2251/2251 [00:53<00:00, 42.23it/s]
  df = pd.concat([df, pd.DataFrame([{'size':32*(1+exp),'accuracy':iou_scores.sum()/len(test_dataset)}])], ignore_index=True)
100%|██████████| 2251/2251 [00:53<00:00, 41.94it/s]
100%|██████████| 2251/2251 [00:53<00:00, 42.01it/s]
100%|██████████| 2251/2251 [00:54<00:00, 41.66it/s]
100%|██████████| 2251/2251 [00:54<00:00, 41.46it/s]
100%|██████████| 2251/2251 [00:55<00:00, 40.36it/s]
100%|██████████| 2251/2251 [00:57<00:00, 39.44it/s]
100%|██████████| 2251/2251 [01:01<00:00, 36.71it/s]
100%|██████████| 2251/2251 [01:01<00:00, 36.46it/s]
100%|██████████| 2251/2251 [01:04<00:00, 34.98it/s]
100%|██████████| 2251/2251 [01:06<00:00, 33.89it/s]
100%|██████████| 2251/2251 [01:05<00:00, 34.17it/s]
100%|██████████| 2251/2251 [01:06<00:00, 33.65it/s]
100%|██████████| 2251/2251 [01:08<00:00, 32.91it/s]
100%|██████████| 2251/2251 [01:11<00:00, 31.49it/s]
100%|██████████| 2251/2251 [01:14<00:00, 30.32it/s]
100%|██████████| 2251/2251 [01:17<00:00, 29

In [18]:
df.to_csv('experimental_result/accuracy.csv',index=False)

: 