In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install torchmetrics

import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
import shutil
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torchvision.models.segmentation import deeplabv3_resnet101

from torchvision.transforms import functional as F
from torchmetrics.classification import JaccardIndex

import torchvision.transforms as transforms
from PIL import Image, ImageFilter

import warnings
warnings.filterwarnings("ignore")

# Setting device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Unzipping Dataset

In [None]:
os.makedirs("/content/advanced_ai/task_1/food", exist_ok=True)
!unzip '/content/drive/MyDrive/advanced_ai/task1/Task 1 Food Segmentation Dataset.zip' -d /content/advanced_ai/task_1/food

#### Creating separate folders

In [None]:
# Directories for masks (annotations) and images
annot_train = "/content/advanced_ai/task_1/food/Task 1 Food Segmentation Dataset/FoodSeg103/Images/ann_dir/train"
annot_test = "/content/advanced_ai/task_1/food/Task 1 Food Segmentation Dataset/FoodSeg103/Images/ann_dir/test"
image_train = "/content/advanced_ai/task_1/food/Task 1 Food Segmentation Dataset/FoodSeg103/Images/img_dir/train"
image_test = "/content/advanced_ai/task_1/food/Task 1 Food Segmentation Dataset/FoodSeg103/Images/img_dir/test"

## Loading Images and Masks

In [None]:
test_image_name = "00006149.jpg"

def filepaths(directory):
    files = os.listdir(directory)
    files = sorted(files)
    filepaths = [os.path.join(directory, f) for f in files if f.endswith(('.png', '.jpg', '.jpeg'))]
    return filepaths

# Lists of filepaths for training
train_image_paths = filepaths(image_train)
train_mask_paths = filepaths(annot_train)

# List of filepath for test image and it's mask
test_image_paths = [os.path.join(image_test, test_image_name)]
test_mask_paths = [os.path.join(annot_test, test_image_name.replace(".jpg", ".png"))]


## Image Augmentation

In [None]:
def training_augmentations(image, mask, image_size=(256, 256)):
    # Resizing both the images and masks
    image = F.resize(image, image_size)
    mask = F.resize(mask, image_size, interpolation=transforms.InterpolationMode.NEAREST)

    # Random horizontal flip
    if random.random() > 0.5:
        image = F.hflip(image)
        mask = F.hflip(mask)

    # Random rotation
    angle = random.uniform(-15, 15)
    image = F.rotate(image, angle)
    mask = F.rotate(mask, angle)

    # Random affine transformation (translation and slight rotation)
    affine_params = transforms.RandomAffine.get_params(
        degrees=(-10, 10),
        translate=(0.05, 0.05),
        scale_ranges=None,
        shears=None,
        img_size=image.size
    )
    image = F.affine(image, *affine_params)
    mask = F.affine(mask, *affine_params)

    # Applying color jitter and gaussian blur - This is only for images and not masks
    image = F.adjust_brightness(image, random.uniform(0.8, 1.2))
    image = F.adjust_contrast(image, random.uniform(0.8, 1.2))
    image = F.adjust_saturation(image, random.uniform(0.8, 1.2))
    image = F.adjust_hue(image, random.uniform(-0.1, 0.1))
    image = F.gaussian_blur(image, kernel_size=3, sigma=random.uniform(0.1, 2.0))

    # Converting images to tensor and normalizing them; converting masks to tensors - Keeping the labels as is
    image = F.to_tensor(image)
    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    mask = torch.tensor(np.array(mask), dtype=torch.long)

    return image, mask

def test_augmentations(image, mask, image_size=(256, 256)):
    # For test data, we only apply resize and normalization, no other augmentations
    image = F.resize(image, image_size)
    mask = F.resize(mask, image_size, interpolation=transforms.InterpolationMode.NEAREST)

    image = F.to_tensor(image)
    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    mask = torch.tensor(np.array(mask), dtype=torch.long)

    return image, mask


# Creating dataset using the augmentation functions
def dataset_for_segmentation(image_paths, mask_paths, augmentation_function):
    images = []
    masks = []
    for img_path, mask_path in zip(image_paths, mask_paths):
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)
        aug_image, aug_mask = augmentation_function(image, mask)
        images.append(aug_image)
        masks.append(aug_mask)
    return images, masks

## Creating Data Loaders for PyTorch Models

In [None]:
# Loading the data with the augmentation functions
train_images, train_masks = dataset_for_segmentation(train_image_paths, train_mask_paths, training_augmentations)
test_images, test_masks = dataset_for_segmentation(test_image_paths, test_mask_paths, test_augmentations)

# Converting lists to DataLoaders
def create_data_loader(image_data, mask_data, batch_size=4, shuffle=True):
    dataset = list(zip(image_data, mask_data))
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

train_loader = create_data_loader(train_images, train_masks, batch_size=4, shuffle=True)
test_loader = create_data_loader(test_images, test_masks, batch_size=4, shuffle=False)

# Checking the batch shapes for the 1st batch only!
for imgs, masks in train_loader:
    print("Image batch shape:", imgs.shape)  # [batch_size, 3, 256, 256]
    print("Mask batch shape:", masks.shape)    # [batch_size, 256, 256]
    print("Unique values in first mask batch:", torch.unique(masks))
    break


## Sample images from the train dataset


In [None]:
# Selecting a batch from the train_loader
imgs, masks = next(iter(train_loader))

# Converting tensors to NumPy arrays for visualization
imgs = imgs.cpu().numpy()
masks = masks.cpu().numpy()

# Displaying some samples
num_samples = min(4, len(imgs))  # Displaying 4 images
fig, axes = plt.subplots(num_samples, 2, figsize=(8, 4 * num_samples))

for i in range(num_samples):
    # Converting image from [C, H, W] to [H, W, C] and scale to [0,1] for visualization
    img = imgs[i].transpose(1, 2, 0)
    # Squeezing the mask to remove the extra dimension
    mask = masks[i].squeeze()  # Mask is now [H, W]

    # Plotting the original image
    axes[i, 0].imshow(img)
    axes[i, 0].set_title("Image")
    axes[i, 0].axis('off')

    # Plotting it's mask
    axes[i, 1].imshow(mask, cmap='gray')
    axes[i, 1].set_title("Mask")
    axes[i, 1].axis('off')

plt.tight_layout()
plt.show()

# Model - DeeplabV3 with Resnet101

## Loading a Pre-trained Deeplabv3 model with Resnet101


*   Pretrained on COCO (Common Objects in Context)
*   Fine Tuned here on FoodSeg103 - only the last block





In [None]:
# DeepLabV3 with ResNet-101 backbone
model = deeplabv3_resnet101(pretrained=True)
model.classifier[4] = nn.Conv2d(256, 104, kernel_size=1)  # Adjusting the output layer to have 104 classes including the background
model = model.to(device)


# Unfreezing the last ResNet block for fine-tuning
for param in model.backbone.parameters(): # This ensures all layers are frozen
    param.requires_grad = False
for param in model.backbone.layer4.parameters():  # Unfreezing layer4 (last block)
    param.requires_grad = True


criterion = nn.CrossEntropyLoss() # Using cross entropy loss as the loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4) # Optimiser


## Calculating Mean IoU to track the model's performance

In [None]:
def compute_iou(preds, masks, num_classes=104):
    iou_per_class = []

    preds = preds.cpu().numpy()
    masks = masks.cpu().numpy()

    for class_id in range(1, num_classes):  # Excluding the background class
        intersection = np.logical_and(preds == class_id, masks == class_id).sum()
        union = np.logical_or(preds == class_id, masks == class_id).sum()

        if union == 0:
            iou = 1.0  # To avoid division by zero
        else:
            iou = intersection / union

        iou_per_class.append(iou)

    return np.mean(iou_per_class)  # Mean IoU across all classes

## Model Training  - Uncomment only for training!!

In [None]:
# # Training loop
# epochs = 50
# model.train()
# for epoch in range(epochs):
#     epoch_loss = 0
#     miou_scores = []
#     for imgs, masks in train_loader:
#         imgs, masks = imgs.to(device), masks.to(device)
#         optimizer.zero_grad()
#         outputs = model(imgs)['out']
#         loss = criterion(outputs, masks)
#         loss.backward()
#         optimizer.step()
#         epoch_loss += loss.item()

#         preds = torch.argmax(outputs, dim=1)
#         for class_id in range(1, 104):  # Excluding background
#             intersection = torch.sum((preds == class_id) & (masks == class_id))
#             union = torch.sum((preds == class_id) | (masks == class_id))

#             # Print intersection and union values for each class
#             # print(f"Class {class_id}: Intersection = {intersection.item()}, Union = {union.item()}")

#             # Only compute IoU if the class appears in either prediction or ground truth
#             if union > 0:
#                 iou = (intersection.float() / union.float()).cpu().item()
#                 miou_scores.append(iou)

#     miou_scores = np.array(miou_scores)
#     miou_score = np.mean(miou_scores)
#     print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}, Mean IoU: {miou_score:.4f}")

# torch.save(model.state_dict(), "/content/drive/MyDrive/advanced_ai/task1/task1_food_segmentation_model.pth")

# Model Evaluation using Test Image

## Loading the saved model

In [None]:
model.load_state_dict(torch.load("/content/drive/MyDrive/advanced_ai/task1/task1_food_segmentation_model.pth"))
model.to(device)

## Running the model with test image

In [None]:
model.eval()
miou_scores = []

with torch.no_grad():
    for imgs, masks in test_loader:
        imgs, masks = imgs.to(device), masks.to(device)

        # Converting masks to integer class labels
        masks = masks.round().long()

        outputs = model(imgs)['out']
        preds = torch.argmax(outputs, dim=1)

        # Checking unique values in predictions and masks
        print(f"Predicted classes: {torch.unique(preds)}")
        print(f"Ground truth classes: {torch.unique(masks)}")

        for class_id in range(1, 104):  # Excluding background
            intersection = torch.sum((preds == class_id) & (masks == class_id))
            union = torch.sum((preds == class_id) | (masks == class_id))

            # Print intersection and union values for each class - only uncomment to check if the code is working properly
            # print(f"Class {class_id}: Intersection = {intersection.item()}, Union = {union.item()}")

            # Only compute IoU if the class appears in either prediction or ground truth
            if union > 0:
                iou = (intersection.float() / union.float()).cpu().item()
                miou_scores.append(iou)

        # Visualizing original image, ground truth mask, and predicted mask for testing
        img = imgs[0].cpu().numpy().transpose(1, 2, 0)  # Converting to HWC format for visualisation
        mask = masks[0].squeeze(0).cpu().numpy()  # Ground truth mask
        pred_mask = preds[0].cpu().numpy()  # Predicted mask

        # Display the images
        fig, ax = plt.subplots(1, 3, figsize=(15, 5))
        ax[0].imshow(img)
        ax[0].set_title("Original Image")
        ax[0].axis('off')
        ax[1].imshow(mask, cmap='gray')
        ax[1].set_title("Ground Truth Mask")
        ax[1].axis('off')
        ax[2].imshow(pred_mask, cmap='gray')
        ax[2].set_title("Predicted Mask")
        ax[2].axis('off')
        plt.show()

# Computing the mean IoU only over the classes that were present
if miou_scores:
    miou_score = np.mean(miou_scores)
else:
    miou_score = 0.0

print(f"Mean IoU: {miou_score:.4f}")
