###### DeepLabV3+

training images are in
training/image_2 and labels are in training/semantic_rgb


In [None]:
import torch
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
import cv2


Change output classes

###### Train

In [None]:
import torch
import torchvision
from torchvision.models.segmentation import deeplabv3_resnet101

# Load DeepLabV3+ with a ResNet-101 backbone
model = deeplabv3_resnet101(pretrained=True)

# Modify the last layer for 2 classes: {background, lane}
model.classifier[4] = torch.nn.Conv2d(256, 2, kernel_size=1)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

# Define a custom dataset class
class KITTILaneDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = sorted(os.listdir(image_dir))
        self.masks = sorted(os.listdir(mask_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.masks[idx])

        image = cv2.imread(img_path)  # Load image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Load mask (grayscale)

        # Resize both the image and mask to a fixed size (e.g., 520x520)
        image = cv2.resize(image, (520, 520))  # Resize image to 520x520
        mask = cv2.resize(mask, (520, 520), interpolation=cv2.INTER_NEAREST)  # Resize mask (use nearest for binary mask)

        # Normalize mask: 255 (lane) → 1, 0 (background) → 0
        mask = (mask > 128).astype(np.uint8)

        if self.transform:
            image = self.transform(image)
            mask = torch.tensor(mask, dtype=torch.long)  # Convert to tensor

        return image, mask

# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((520, 520)),  # Resize images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

# Load dataset
dataset = KITTILaneDataset("./training/image_2", "./training/semantic_rgb", transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
import torch.nn as nn
import torch.optim as optim

# Cross-Entropy Loss for segmentation
criterion = nn.CrossEntropyLoss()

# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
from tqdm import tqdm

num_epochs = 10  # Set epochs
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    loop = tqdm(dataloader, leave=True)

    for images, masks in loop:
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)["out"]  # Forward pass

        loss = criterion(outputs, masks)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=running_loss / len(dataloader))

print("Training Complete!")

In [None]:
import matplotlib.pyplot as plt

# Switch to evaluation mode
model.eval()

def predict(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    input_image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_image)['out']
        predicted_mask = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()

    # Visualize
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title("Original Image")
    plt.axis("off")

    plt.subplot(1, 2, 2)
    plt.imshow(predicted_mask, cmap="gray")
    plt.title("Predicted Lane Mask")
    plt.axis("off")

    plt.show()

# Run prediction on a sample image
predict("/content/testing/image_2/000001_10.png")

In [None]:
import PIL
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
im_path = "/content/training/image_2/000020_10.png"
image = PIL.Image.open(im_path)
plt.imshow(image)

plt.subplot(1, 2, 2)
mask_path = "/content/training/semantic_rgb/000020_10.png"
mask = PIL.Image.open(mask_path)
plt.imshow(mask)

plt.show()

In [None]:
pixels = np.array(mask)
lane_color = np.array([128,64,128])
lane_mask = np.all(pixels==lane_color, axis=-1)

pixels[lane_mask] = [255,255,255]
pixels[~lane_mask]=[0,0,0]

preprocessed_mask = PIL.Image.fromarray(pixels)
plt.imshow(preprocessed_mask)
plt.show()

###### To find the number of classes

In [None]:
import cv2
import numpy as np
mask_path = "/content/training/semantic_rgb/000020_10.png"
mask = cv2.imread(mask_path)
mask_reshaped = mask.reshape(-1, 3)
unique_classes = np.unique(mask_reshaped, axis=0)

In [None]:
print(unique_classes)

In [None]:
print(len(unique_classes))

###### To preprocess training masks

In [None]:
# Define a dictionary mapping RGB colors to class labels
# The key is the (R, G, B) tuple, and the value is the corresponding class label
color_map = {
    (128, 64, 128): 1,    # Road
    (35,142,107): 2,    # Sidewalk
    (70, 70, 70): 3,      # Building
    (60,20,220): 4,   # Wall
    (153, 153, 153): 5,   # Fence
    (153, 153, 190): 6,   # Vegetation
    (0,220,220): 7,     # Terrain
    (142,0,0): 8,       # Sky
    (100, 100, 150): 9,       # Person
    (152, 251, 152): 10,      # Car
    (180, 130, 70): 11,    # Bicycle
    (232, 35, 244): 12,    # Motorcycle
    (0,0,0): 0,   # Background (can be 0)
}


In [None]:
import numpy as np
import cv2

# Load a color-encoded mask image (example path)
mask_image = cv2.imread('/content/training/semantic_rgb/000020_10.png')  # Shape should be (height, width, 3)

# Create an empty integer mask with the same height and width as the input mask
height, width, _ = mask_image.shape
integer_mask = np.zeros((height, width), dtype=np.uint8)

# Iterate over each pixel in the mask and convert the RGB to class label
for i in range(height):
    for j in range(width):
        # Get the RGB value of the pixel
        rgb_value = tuple(mask_image[i, j])

        # If the RGB value exists in the color_map, assign the corresponding class label
        if rgb_value in color_map:
            integer_mask[i, j] = color_map[rgb_value]
        else:
            integer_mask[i, j] = 0  # If the color is not in the map, we assign it as background

# Now `integer_mask` contains class labels (0 to 12 for KITTI)
print(integer_mask)


In [None]:
cv2.imwrite('converted_mask.png', integer_mask)

In [None]:
scaled_mask = (integer_mask * (255 // 12)).astype(np.uint8)
cv2.imwrite('converted_scaled_mask.png', scaled_mask)

In [None]:
import os

# Path to the folder with RGB masks
mask_folder = '/content/training/semantic_rgb'
output_mask_folder = '/content/preprocessed_masks'
os.makedirs(output_mask_folder)
n = 0
total = len(os.listdir(mask_folder))
# Loop through all mask images
for mask_filename in os.listdir(mask_folder):
    if mask_filename.endswith(".png"):
        mask_path = os.path.join(mask_folder, mask_filename)
        mask_image = cv2.imread(mask_path)

        # Create empty integer mask
        height, width, _ = mask_image.shape
        integer_mask = np.zeros((height, width), dtype=np.uint8)

        # Convert RGB to class labels
        for i in range(height):
            for j in range(width):
                rgb_value = tuple(mask_image[i, j])
                if rgb_value in color_map:
                    integer_mask[i, j] = color_map[rgb_value]
                else:
                    integer_mask[i, j] = 0  # Background

        # Save the integer mask
        output_mask_path = os.path.join(output_mask_folder, mask_filename)
        cv2.imwrite(output_mask_path, integer_mask)
        n+=1
        print(str(n) +'/'+ str(total))


###### Train the model

In [None]:
import os
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

class KITTIDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None, image_size=(256, 256)):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_size = image_size
        self.image_names = sorted(os.listdir(image_dir))
        self.mask_names = sorted(os.listdir(mask_dir))

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_names[idx])
        mask_path = os.path.join(self.mask_dir, self.mask_names[idx])

        # Open the image and mask using PIL
        image = Image.open(image_path)
        mask = Image.open(mask_path)

        image = image.resize(self.image_size, Image.BILINEAR)
        mask = mask.resize(self.image_size, Image.NEAREST)

        if self.transform:
            image = self.transform(image)
            mask = torch.tensor(np.array(mask), dtype=torch.long)

        return image, mask

In [None]:
from torch.utils.data import DataLoader

# Transformations (resize, normalize, etc.)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize all images to 256x256 (adjust as needed)
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalization
])

# Create dataset
train_dataset = KITTIDataset(image_dir='/content/training/image_2', mask_dir='/content/preprocessed_masks', transform=transform)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [None]:
import torch
import torchvision.models.segmentation as segmentation

# Load pre-trained DeepLabV3+ with a ResNet-101 backbone
model = segmentation.deeplabv3_resnet101(pretrained=True)

# Modify the classifier to output 13 classes (for KITTI dataset)
model.classifier[4] = torch.nn.Conv2d(256, 13, kernel_size=(1, 1), stride=(1, 1))

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
import torch.optim as optim
import torch.nn as nn

# Loss function: CrossEntropyLoss (for multi-class segmentation)
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
num_epochs = 10  # Adjust number of epochs
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for images, masks in train_loader:
        images = images.to(device)
        masks = masks.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)['out']

        # Compute loss
        loss = criterion(outputs, masks)

        # Backward pass
        loss.backward()

        # Optimize the weights
        optimizer.step()

        running_loss += loss.item()

    # Print loss after each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

    # Optionally, you can save the model after every epoch or at specific intervals
torch.save(model.state_dict(), f"deeplabv3_epoch{epoch+1}.pth")


In [None]:
import time
from tqdm import tqdm  # Progress bar

num_epochs = 10  # Adjust number of epochs
print_freq = 10  # Print loss every 10 batches

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    start_time = time.time()

    # tqdm progress bar for better visibility
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}")

    for batch_idx, (images, masks) in progress_bar:
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)['out']  # Forward pass
        loss = criterion(outputs, masks)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Optimize the weights

        running_loss += loss.item()

        # Print progress every `print_freq` batches
        if (batch_idx + 1) % print_freq == 0 or batch_idx == len(train_loader) - 1:
            avg_loss = running_loss / (batch_idx + 1)
            progress_bar.set_postfix(loss=f"{avg_loss:.4f}")

    # Calculate and print epoch summary
    epoch_loss = running_loss / len(train_loader)
    epoch_time = time.time() - start_time
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {epoch_loss:.4f} - Time: {epoch_time:.2f}s")

    # Save model at the end
torch.save(model.state_dict(), f"deeplabv3_epoch{epoch+1}.pth")


###### Inference

In [None]:
import matplotlib.pyplot as plt

# Predict and visualize
model.eval()
image, mask = train_dataset[0]  # Take an image from the dataset

# Get model prediction
with torch.no_grad():
    output = model(image.unsqueeze(0).to(device))['out'][0]
    predicted_mask = torch.argmax(output, dim=0).cpu().numpy()

plt.figure(figsize=(10, 5))

mean = np.array([0.485, 0.456, 0.406])  # ImageNet mean
std = np.array([0.229, 0.224, 0.225])   # ImageNet std

def denormalize(tensor):
    """ Convert normalized tensor back to original image format """
    tensor = tensor.numpy().transpose(1, 2, 0)  # Convert [C, H, W] to [H, W, C]
    tensor = (tensor * std) + mean  # Undo normalization
    tensor = np.clip(tensor, 0, 1)  # Clip values to be between [0,1]
    return tensor


# Original Image
plt.subplot(1, 3, 1)
plt.imshow(denormalize(image.cpu()))
plt.title("Original Image")
plt.axis("off")

# Predicted Mask
plt.subplot(1, 3, 2)
plt.imshow((predicted_mask * (255 // 12)).astype(np.uint8))
plt.title("Predicted Mask")
plt.axis("off")

# True Mask
plt.subplot(1, 3, 3)
plt.imshow(mask)
plt.title("True Mask")
plt.axis("off")

plt.show()


###### Save the model to my Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')  # Gives access to your Google Drive
torch.save(model.state_dict(), "/content/drive/My Drive/deeplabv3_epoch10.pth")

In [None]:
!cp -r "/content/preprocessed_masks" "/content/drive/MyDrive/MECH 501 Lane Segmentation"

###### Define function to evaluate IoU metric

In [None]:
def compute_iou(pred_mask, true_mask, class_id):
    """
    Computes the Intersection over Union (IoU) for a specific class in a single image.

    Args:
    - pred_mask (numpy array): Predicted segmentation mask
    - true_mask (numpy array): Ground truth segmentation mask
    - class_id (int): The class ID to compute IoU for (e.g., lane markings).

    Returns:
    - IoU score (float)
    """

    assert pred_mask.shape == true_mask.shape, "Mask shapes should be the same"

    assert class_id in color_map.values(), "class_ID should be defined in color to class mapping"

    # Create binary masks for the given class - Extract the class required
    pred_class = (pred_mask == class_id)
    true_class = (true_mask == class_id)

    # Compute intersection and union
    intersection = np.logical_and(pred_class, true_class).sum()
    union = np.logical_or(pred_class, true_class).sum()

    if union == 0: #If the required class does not exist in either of the prediction or true mask
        return 0.0

    return intersection / union

###### Compute IoU

In [None]:
iou_score = compute_iou(predicted_mask, mask, 1)

In [None]:
iou_score.item()