# Mounting Drive to access Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


### Dependencies


In [None]:
! pip install torch torchvision matplotlib





## Splitting and Convertion xml Annotations to Masks

In [None]:
import os
import shutil
import cv2
import numpy as np
from pycocotools.coco import COCO


input_root = "/content/drive/MyDrive/DWF"
output_root = "/content/drive/MyDrive/DatasetWF"
splits = ["train", "valid", "test"]


def convert_split(split):
    split_in = os.path.join(input_root, split)
    split_out = os.path.join(output_root, split)


    ann_file = None
    for f in os.listdir(split_in):
        if f.endswith(".json"):
            ann_file = os.path.join(split_in, f)
            break
    if ann_file is None:
        print(f" No JSON file found in {split_in}, skipping...")
        return

    img_dir = split_in
    img_out_dir = os.path.join(split_out, "images")
    mask_out_dir = os.path.join(split_out, "masks")

    os.makedirs(img_out_dir, exist_ok=True)
    os.makedirs(mask_out_dir, exist_ok=True)

    coco = COCO(ann_file)

    for img_id in coco.getImgIds():
        img_info = coco.loadImgs(img_id)[0]
        file_name = img_info["file_name"]
        height, width = img_info["height"], img_info["width"]


        mask = np.zeros((height, width), dtype=np.uint8)

        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        for ann in anns:
            cat_id = ann["category_id"]
            rle = coco.annToMask(ann)
            mask[rle == 1] = cat_id


        mask_path = os.path.join(mask_out_dir, file_name.replace(".jpg", ".png"))
        cv2.imwrite(mask_path, mask)


        src_img = os.path.join(img_dir, file_name)
        dst_img = os.path.join(img_out_dir, file_name)
        if os.path.exists(src_img):
            shutil.copy(src_img, dst_img)

    print(f" {split} processed: {len(coco.getImgIds())} images")


for split in splits:
    convert_split(split)

print(" All splits converted successfully at:", output_root)


loading annotations into memory...
Done (t=58.06s)
creating index...
index created!
 train processed: 8392 images
loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
 valid processed: 989 images
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
 test processed: 493 images
 All splits converted successfully at: /content/drive/MyDrive/DatasetWF


### Checking Mask

In [None]:
import cv2
import numpy as np

mask = cv2.imread("/content/drive/MyDrive/DatasetWF/train/masks/ADE_train_00000002_jpg.rf.c34b3e1c75afec24fe304e4325988598.png", cv2.IMREAD_UNCHANGED)
print("Unique pixel values in mask:", np.unique(mask))

Unique pixel values in mask: [0 1 2]


## Modifying the Head

In [None]:
import torchvision
from torch import nn

model = torchvision.models.segmentation.deeplabv3_mobilenet_v3_large(pretrained=True)


num_classes = 3
model.classifier[4] = nn.Conv2d(256, num_classes, kernel_size=1)




In [None]:
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")


CUDA available: True
GPU name: Tesla T4


### Setting Dataset Path

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

# dataset paths
train_dataset = SegmentationDataset("/content/drive/MyDrive/DatasetWF/train/images",
                                    "/content/drive/MyDrive/DatasetWF/train/masks")
val_dataset = SegmentationDataset("/content/drive/MyDrive/DatasetWF/valid/images",
                                  "/content/drive/MyDrive/DatasetWF/valid/masks")

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-4)


## Train and Evaluate F(x)

In [None]:
def train_one_epoch(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    for imgs, masks in loader:
        imgs, masks = imgs.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)['out']
        loss = F.cross_entropy(outputs, masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for imgs, masks in loader:
            imgs, masks = imgs.to(device), masks.to(device)
            outputs = model(imgs)['out']
            loss = F.cross_entropy(outputs, masks)
            total_loss += loss.item()
    return total_loss / len(loader)


## Fine-Tuning (20 Epochs)

In [None]:
num_epochs = 20
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, device)
    val_loss = evaluate(model, val_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

torch.save(model.state_dict(), "/content/drive/MyDrive/deeplabv3_mobilenet_finetuned.pth")
