In [1]:
# data preprocessing
import os
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision.transforms import Compose, ToTensor, Normalize


class CityscapesDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transforms=None):
        self.transforms = transforms
        self.images = []
        self.masks = []

        # Walk through the image directory
        for root, _, files in os.walk(image_dir):
            for file in files:
                if file.endswith('.png'):
                    img_path = os.path.join(root, file)
                    mask_path = os.path.join(mask_dir, root.split('/')[-1], file.replace('leftImg8bit', 'gtFine_labelIds'))
                    
                    # Check if mask exists
                    if os.path.exists(mask_path):
                        self.images.append(img_path)
                        self.masks.append(mask_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        mask_path = self.masks[idx]
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")  # Convert to grayscale

        if self.transforms:
            image = self.transforms(image)
            mask = ToTensor()(mask)  # Ensure mask is also a tensor

        return image, mask

# Define transforms
transforms = Compose([
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Setup dataset
train_dataset = CityscapesDataset(
    image_dir='/home/maith/Desktop/cityscapes/leftImg8bit/train',
    mask_dir='/home/maith/Desktop/cityscapes/gtFine/train',
    transforms=transforms
)

val_dataset = CityscapesDataset(
    image_dir='/home/maith/Desktop/cityscapes/leftImg8bit/val',
    mask_dir='/home/maith/Desktop/cityscapes/gtFine/val',
    transforms=transforms
)

In [2]:
#Data Loader
from torch.utils.data import DataLoader

def get_dataloader(batch_size, train_dataset, val_dataset, shuffle=True, num_workers=4):
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=shuffle, 
        num_workers=num_workers
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=num_workers
    )
    
    return train_loader, val_loader

# Define batch size
batch_size = 16

# Setup the validation dataset using the correct paths
val_dataset = CityscapesDataset(
    image_dir='/home/maith/Desktop/cityscapes/leftImg8bit/val',
    mask_dir='/home/maith/Desktop/cityscapes/gtFine/val',
    transforms=transforms
)

# Create data loaders
train_loader, val_loader = get_dataloader(batch_size, train_dataset, val_dataset)

print(f"Train loader length (number of batches): {len(train_loader)}")
print(f"Validation loader length (number of batches): {len(val_loader)}")

Train loader length (number of batches): 186
Validation loader length (number of batches): 32


In [3]:
# ENet model
import torch
import torch.nn as nn
import torch.nn.functional as F

class InitialBlock(nn.Module):
    def __init__(self, out_channels):
        super(InitialBlock, self).__init__()
        self.conv = nn.Conv2d(3, out_channels, kernel_size=3, stride=2, padding=1, bias=False)
        self.maxpool = nn.MaxPool2d(2, stride=2)
        self.batch_norm = nn.BatchNorm2d(out_channels + 3)
        self.prelu = nn.PReLU()

    def forward(self, x):
        conv = self.conv(x)
        maxpool = self.maxpool(x)
        merged = torch.cat((conv, maxpool), 1)
        return self.prelu(self.batch_norm(merged))

class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, internal_scale=4,
                 asymmetric=False, dilated=False, padding=1, dropout_prob=0.01,
                 downsample=False, upsample=False):
        super(Bottleneck, self).__init__()
        internal_channels = out_channels // internal_scale
        self.downsample = downsample
        self.upsample = upsample

        # Main branch
        if downsample:
            self.main_conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=2, stride=2),
                nn.BatchNorm2d(out_channels))
        elif upsample:
            self.main_conv = nn.Sequential(
                nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
                nn.BatchNorm2d(out_channels))
        else:
            self.main_conv = nn.Sequential()  # Regular

        # Subsequent convolutions
        self.ext_conv1 = nn.Conv2d(in_channels if downsample else out_channels, internal_channels, kernel_size=1)
        self.ext_conv2 = nn.Conv2d(internal_channels, internal_channels, kernel_size=kernel_size,
                                   dilation=dilated if dilated else 1, padding=padding, stride=1,
                                   groups=internal_channels if asymmetric else 1, bias=False)
        self.ext_conv3 = nn.Conv2d(internal_channels, out_channels, kernel_size=1)
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.dropout = nn.Dropout2d(dropout_prob)
        self.prelu = nn.PReLU()

    def forward(self, x):
        main = self.main_conv(x)
        x = self.ext_conv1(x)
        x = self.ext_conv2(x)
        x = self.ext_conv3(x)
        x = self.batch_norm(x)
        x = self.dropout(x)
        x = x + main
        return self.prelu(x)

class ENet(nn.Module):
    def __init__(self, num_classes):
        super(ENet, self).__init__()
        self.initial_block = InitialBlock(16)
        self.bottleneck1_0 = Bottleneck(19, 64, downsample=True)
        self.bottleneck1_1 = Bottleneck(64, 64)
        self.bottleneck2_0 = Bottleneck(64, 128, downsample=True)
        self.bottleneck2_1 = Bottleneck(128, 128)
        self.bottleneck3_0 = Bottleneck(128, 128, upsample=True)
        self.final_block = nn.ConvTranspose2d(128, num_classes, kernel_size=2, stride=2)

    def forward(self, x):
        x = self.initial_block(x)
        x = self.bottleneck1_0(x)
        x = self.bottleneck1_1(x)
        x = self.bottleneck2_0(x)
        x = self.bottleneck2_1(x)
        x = self.bottleneck3_0(x)
        x = self.final_block(x)
        return x

# Initialize the model
model = ENet(num_classes=20)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

print("ENet model defined and ready for training.")

ENet model defined and ready for training.


In [4]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25, device='cpu'):
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        train_loss = 0.0
        
        # Training phase
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
        
        train_loss /= len(train_loader.dataset)
        
        # Validation phase
        model.eval()  # Set model to evaluate mode
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels.long())
                val_loss += loss.item() * images.size(0)
        
        val_loss /= len(val_loader.dataset)
        
        # Print training/validation statistics 
        print(f'Epoch: {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
        
    print('Training complete.')

# Run the training loop
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device=device)

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 GiB. GPU 