# Petroglyphs Segmentation Model - Notebook

### Using a Custom Dice Loss class



$\text{Given a predicted mask }  p \text{ and the ground truth target } t, \text{ with a smoothing factor } s, \text{ the Dice Loss } L \text{ is defined as:}$

#### $L = 1 - \frac{2 \cdot \sum(p \cdot t) + s}{\sum(p^{2}) + \sum(t^{2}) + s}$

The smoothing factor is a very small number added to avoid division by 0. Read more about Dice Loss [here](https://arxiv.org/pdf/1606.04797.pdf) (on page 6).

In [1]:

import torch
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader, Dataset, random_split
from PIL import Image
import os
from torch import nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


## Loading Data into Dataloaders

In [9]:
# Define transforms for images and masks
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor()
])

# Custom Dataset class to preload images and masks into memory
class PreloadedDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.image_files = sorted(os.listdir(os.path.join(root_dir, 'Unlabelled_Images')))
        self.mask_files = sorted(os.listdir(os.path.join(root_dir, 'Labels')))
        self.images = []
        self.masks = []
        self.transform = transform
        self._preload_data()

    def _preload_data(self):
        for image_file, mask_file in zip(self.image_files, self.mask_files):
            image_path = os.path.join(self.root_dir, 'Unlabelled_Images', image_file)
            mask_path = os.path.join(self.root_dir, 'Labels', mask_file)
            image = Image.open(image_path).convert("RGB")
            mask = Image.open(mask_path).convert("L")
            if self.transform:
                image = self.transform(image)
                mask = self.transform(mask)
            self.images.append(image)
            self.masks.append(mask)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.masks[idx]

dataset = PreloadedDataset(root_dir='', transform=transform)

train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
b = 4
train_dataloader = DataLoader(train_dataset, batch_size=b, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=b)

# Load pre-trained DeepLabv3+ model
model = models.segmentation.deeplabv3_resnet50(weights=models.segmentation.DeepLabV3_ResNet50_Weights.DEFAULT)

# Modify last layer for binary segmentation
num_classes = 1
model.classifier[4] = nn.Sequential(
    nn.Conv2d(256, num_classes, kernel_size=(1, 1), stride=(1, 1)),
    nn.Sigmoid())

# Load model on CPU
model.load_state_dict(torch.load("deeplabv3_small_ubaid.pth", map_location=torch.device('cpu')))


<All keys matched successfully>

In [10]:
torch.__version__

'2.1.0+cpu'

In [12]:
import matplotlib.pyplot as plt
import numpy as np

# Function to visualize images and masks in a grid
def visualize_predictions_grid(model, dataloader, num_samples=10):
    # model.cuda()
    model.eval()
    samples = np.random.choice(len(dataloader.dataset), num_samples, replace=False)

    total_rows = num_samples
    cols_per_sample = 3
    plt.figure(figsize=(cols_per_sample * 5, total_rows * 5))
    
    with torch.no_grad():
        for i, idx in enumerate(samples):
            image, mask = dataloader.dataset[idx]
            image = image.unsqueeze(0).cuda()
            mask = mask.unsqueeze(0)
            output = model(image)['out'] >= 0.5 # defined the threshold at 0.4, can experiment with different values
            pred_mask = output.squeeze().cpu().numpy()

            plt.subplot(total_rows, cols_per_sample, i * cols_per_sample + 1)
            plt.imshow(transforms.ToPILImage()(image.squeeze()))
            plt.title(f'Image {i+1}')
            plt.axis('off')

            plt.subplot(total_rows, cols_per_sample, i * cols_per_sample + 2)
            plt.imshow(transforms.ToPILImage()(mask.squeeze()), cmap='gray')
            plt.title(f'Ground Truth {i+1}')
            plt.axis('off')

            plt.subplot(total_rows, cols_per_sample, i * cols_per_sample + 3)
            plt.imshow(pred_mask, cmap='gray')
            plt.title(f'Predicted {i+1}')
            plt.axis('off')

    # Display the full grid
    plt.tight_layout()
    plt.show()

visualize_predictions_grid(model, val_dataloader)


ValueError: Cannot take a larger sample than population when 'replace=False'

In [8]:
def dice_coef(groundtruth_mask, pred_mask):
    intersect = np.sum(pred_mask*groundtruth_mask)
    total_sum = np.sum(pred_mask**2) + np.sum(groundtruth_mask**2)
    dice = np.mean(2*intersect/total_sum)
    return round(dice, 3) #round up to 3 decimal places

def iou(groundtruth_mask, pred_mask):
    intersect = np.sum(pred_mask*groundtruth_mask)
    union = np.sum(pred_mask) + np.sum(groundtruth_mask) - intersect
    iou = np.mean(intersect/union)
    return round(iou, 3)

true = []
pred = []
for imgs, mask in val_dataloader:
    true.extend([m.cpu().numpy().reshape(1, 512, 512) for m in mask])
    with torch.no_grad():
        imgs = imgs.reshape(-1, 3, 512, 512).cuda()
        outputs = model(imgs)['out'].detach().cpu().numpy() >= 0.5
    pred.extend([m for m in outputs])

true = np.array(true)
pred = np.array(pred)
print("Dice coefficient is:",dice_coef(true,pred))
print("IoU:", iou(true,pred))

Dice coefficient is: 0.777
IoU: 0.629
