In [3]:
import torch
import torchvision.transforms as transforms
import torchvision.models.segmentation as models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import cv2
import numpy as np

In [4]:
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# Define Paths to Cityscapes dataset
CITYSCAPES_IMG_DIR = "./content/cityscapes/leftImg8bit/train"
CITYSCAPES_MASK_DIR = "./content/cityscapes/gtFine/train"

In [6]:
# Step 1: Define Cityscapes Dataset Class
class CityscapesDataset(Dataset):
    def __init__(self, img_dir, mask_dir, img_size=(512, 1024), transform=None):
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.img_size = img_size
        self.transform = transform

        self.image_paths = []
        self.mask_paths = []

        # Read all image and mask files
        for city in os.listdir(img_dir):
            img_city_path = os.path.join(img_dir, city)
            mask_city_path = os.path.join(mask_dir, city)

            for file_name in os.listdir(img_city_path):
                img_path = os.path.join(img_city_path, file_name)
                mask_path = os.path.join(mask_city_path, file_name.replace("_leftImg8bit.png", "_gtFine_labelTrainIds.png"))

                if os.path.exists(mask_path):
                    self.image_paths.append(img_path)
                    self.mask_paths.append(mask_path)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load Image
        img = cv2.imread(self.image_paths[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, self.img_size)
        img = img / 255.0  # Normalize

        # Load Mask
        mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask, self.img_size, interpolation=cv2.INTER_NEAREST)

        # Convert to Tensor
        img = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)  # Convert (H,W,C) -> (C,H,W)
        mask = torch.tensor(mask, dtype=torch.long)  # No need for one-hot encoding

        return img, mask

In [7]:
# Step 2: Load the Dataset
train_dataset = CityscapesDataset(CITYSCAPES_IMG_DIR, CITYSCAPES_MASK_DIR)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

FileNotFoundError: [Errno 2] No such file or directory: './content/cityscapes/leftImg8bit/train'

In [None]:
# Step 3: Load Pre-Trained DeepLabV3+ (ResNet-50 Backbone)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.deeplabv3_resnet50(pretrained=True)

In [None]:
# Step 4: Modify Final Classification Layer for Cityscapes (19 classes)
num_classes = 19
model.classifier[4] = nn.Conv2d(256, num_classes, kernel_size=1)
model.to(device)

In [None]:
# Step 5: Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
# Step 6: Training Loop
def train_model(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for images, masks in dataloader:
            images, masks = images.to(device), masks.to(device)

            optimizer.zero_grad()
            outputs = model(images)['out']
            loss = criterion(outputs, masks)

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(dataloader)}")


In [None]:
# Start Training
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

In [None]:
# Step 7: Fine-Tune All Layers (Unfreeze Backbone)
for param in model.backbone.parameters():
    param.requires_grad = True

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-5)
train_model(model, train_loader, criterion, optimizer, num_epochs=10)