In [None]:
!pip install fvcore

Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvcore)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Downloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Building wheels for collected packages: fvcore, iopath
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Created wheel for fvcore: filename=fvcore-0.1.5.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# import modules
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
import time
import torch.optim as optim
import os
import sys

from torchvision import transforms
from torchvision.transforms import InterpolationMode
from fvcore.nn import FlopCountAnalysis, flop_count_table
from torch.utils.data import DataLoader

In [None]:
PROJECT_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project'
CITYSCAPES_DIR = '/content/drive/MyDrive/Cityspaces/'
PRETRAIN_MODEL_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project/models/DeepLabV2/deeplab_resnet_pretrained_imagenet.pth'
BEST_MODEL_SAVE_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project/models/DeepLabV2/checkpoints_training/best_model_DeepLab.pth'
LAST_EPOCH_SAVE_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project/models/DeepLabV2/checkpoints_training/last_epoch_DeepLab.pth'

NUM_CLASSES = 19
H = 512
W = 1024
BATCH_SIZE = 4
NUM_WORKERS = 4
LEARNING_RATE = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005

In [None]:
os.chdir(PROJECT_PATH)
sys.path.append(os.getcwd())

from datasets.cityscapes import CityScapes
from utils.utils import poly_lr_scheduler_with_backbone, fast_hist, per_class_iou, mean_iou
import models.DeepLabV2.deeplabv2 as deeplab

In [None]:
torch.manual_seed(42)
np.random.seed(42)
random.seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
# transformers data

data_transforms = {
    'train': transforms.Compose([
        # resize as asked in the project + interpolation
        transforms.Resize((H, W), interpolation=InterpolationMode.BILINEAR),
        # transform to tensor + normalize
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((H, W), interpolation=InterpolationMode.BILINEAR),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# transformers label

label_transform = transforms.Compose([
    # resize as asked in the project + nearest
    transforms.Resize((H, W), interpolation=InterpolationMode.NEAREST),
    # transform to tensor without normalization
    transforms.Lambda(lambda x: torch.from_numpy(np.array(x)).long())
])

# create cityscapes datasets

cityscapes_datasets = {
    x:  CityScapes(
        data_path=CITYSCAPES_DIR,
        split=x,
        transform=data_transforms[x],
        label_transform=label_transform
      )

    for x in ['train', 'val']
}

# create cityscapes dataloader

cityscapes_dataloaders = {
    x: torch.utils.data.DataLoader(
        cityscapes_datasets[x],
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=True,
        pin_memory=True
      )
    for x in ['train', 'val']
}

# save datasets size

dataset_sizes = {x: len(cityscapes_datasets[x]) for x in ['train', 'val']}

In [None]:
# print data type and sizes of dataset
print(f"CITYSCAPES DATASETS: type={type(cityscapes_datasets)}, keys={cityscapes_datasets.keys()} \n")
print(f"TRAIN DATASETS: type={type(cityscapes_datasets['train'])}, len={len(cityscapes_datasets['train'])} \n")
print(f"VAL DATASETS: type={type(cityscapes_datasets['val'])}, len={len(cityscapes_datasets['val'])} \n")
print(f"SINGLE IMAGE: type={type(cityscapes_datasets['train'][0])}, keys={cityscapes_datasets['train'][0].keys()} \n")

image, label = cityscapes_datasets['train'][0].values()
print(f"'x': type={type(image)}, shape={image.shape} \n")
print(f"'y': type={type(label)}, shape={label.shape} \n")

CITYSCAPES DATASETS: type=<class 'dict'>, keys=dict_keys(['train', 'val']) 

TRAIN DATASETS: type=<class 'datasets.cityscapes.CityScapes'>, len=1572 

VAL DATASETS: type=<class 'datasets.cityscapes.CityScapes'>, len=500 

SINGLE IMAGE: type=<class 'dict'>, keys=dict_keys(['x', 'y']) 

'x': type=<class 'torch.Tensor'>, shape=torch.Size([3, 512, 1024]) 

'y': type=<class 'torch.Tensor'>, shape=torch.Size([512, 1024]) 



In [None]:
# print data type and sizes of dataloader
print(f"CITYSCAPES DATALOADER: type={type(cityscapes_dataloaders)}, keys={cityscapes_dataloaders.keys()} \n")
print(f"TRAIN DATALOADER: type={type(cityscapes_dataloaders['train'])}, len={len(cityscapes_dataloaders['train'])} \n")
print(f"VAL DATALOADER: type={type(cityscapes_dataloaders['val'])}, len={len(cityscapes_dataloaders['val'])} \n")

for batch in cityscapes_dataloaders['train']:
  inputs = batch['x']
  labels = batch['y']
  break

print(f"BATCH INPUTS: type={type(inputs)}, shape={inputs.shape} \n")
print(f"BATCH LABELS: type={type(labels)}, shape={labels.shape} \n")

CITYSCAPES DATALOADER: type=<class 'dict'>, keys=dict_keys(['train', 'val']) 

TRAIN DATALOADER: type=<class 'torch.utils.data.dataloader.DataLoader'>, len=393 

VAL DATALOADER: type=<class 'torch.utils.data.dataloader.DataLoader'>, len=125 

BATCH INPUTS: type=<class 'torch.Tensor'>, shape=torch.Size([4, 3, 512, 1024]) 

BATCH LABELS: type=<class 'torch.Tensor'>, shape=torch.Size([4, 512, 1024]) 



In [None]:
def train_model(model, data_loader, dataset_sizes, criterion, optimizer,
                last_epoch_save_path, best_model_save_path, num_epochs=1,
                init_lr=0.01, prev_num_epoch=0, prev_best_miou=0,
                total_number_epochs=50):

    since = time.time()

    best_miou = prev_best_miou
    best_per_class_iou = None

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        since_epoch = time.time()

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # training mode
            else:
                model.eval()   # evaluate mode

            running_loss = 0.0

            hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
            miou = 0

            # Iterate over data
            for batch in data_loader[phase]:
                inputs = batch['x']
                labels = batch['y']

                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)

                    if phase == 'train':
                        loss = criterion(outputs[0], labels) # requires output as [B, C, H, W] and label as [B, H, W]
                        running_loss += loss.item()
                        loss.backward()

                        optimizer.step()

                        preds = torch.argmax(outputs[0], dim=1) # from [B, C, H, W] to [B, H, W]
                        hist += fast_hist(
                                 preds.cpu().data.numpy().flatten(),
                                 labels.cpu().data.numpy().flatten(),
                                 NUM_CLASSES
                                )

                    else:
                        loss = criterion(outputs, labels)
                        running_loss += loss.item()

                        preds = torch.argmax(outputs, dim=1)
                        hist += fast_hist(
                                 preds.cpu().data.numpy().flatten(),
                                 labels.cpu().data.numpy().flatten(),
                                 NUM_CLASSES
                                )

            ious = per_class_iou(hist) * 100
            miou = mean_iou(ious)

            epoch_loss = running_loss / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {miou:.4f}')

        # Update learning rate with poly_lr_scheduler
        next_lr = poly_lr_scheduler_with_backbone(optimizer, init_lr, prev_num_epoch, total_number_epochs)
        prev_num_epoch += 1

        # save the best model
        if miou > best_miou:
            best_miou = miou
            best_per_class_iou = ious
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, best_model_save_path)

        # save the last model
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, last_epoch_save_path)

        time_epoch = time.time() - since_epoch
        print(f'Epoch complete in {time_epoch // 60:.0f}m {time_epoch % 60:.0f}s')
        print(f'Next Learning Rate: {next_lr}')
        print()

    time_elapsed = time.time() - since
    print('-' * 20)
    print()
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val MIOU: {best_miou:4f}')
    print(f'Best val per class IOU: {best_per_class_iou}')
    print()
    print(f'Total Epochs completed: {prev_num_epoch}')

    return model, time_elapsed, best_miou, best_per_class_iou

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [None]:
model = deeplab.get_deeplab_v2(
    num_classes=NUM_CLASSES,
    pretrain=True,
    pretrain_model_path=PRETRAIN_MODEL_PATH
)
model = model.to(device)

optimizer = optim.SGD(
    params=[
    {'params': model.get_1x_lr_params_no_scale(), 'lr': LEARNING_RATE, 'initial_lr': LEARNING_RATE},
    {'params': model.get_10x_lr_params(), 'lr': LEARNING_RATE * 10, 'initial_lr': LEARNING_RATE * 10}
    ],
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY
)

criterion = nn.CrossEntropyLoss(ignore_index=255)


# load checkpoint previous epochs
checkpoint = torch.load(LAST_EPOCH_SAVE_PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])


Deeplab pretraining loading...


  return disable_fn(*args, **kwargs)


In [None]:
model, time_elapsed, best_miou, best_per_class_iou = train_model(
    model=model,
    data_loader=cityscapes_dataloaders,
    dataset_sizes=dataset_sizes,
    criterion=criterion,
    optimizer=optimizer,
    last_epoch_save_path=LAST_EPOCH_SAVE_PATH,
    best_model_save_path=BEST_MODEL_SAVE_PATH,
    num_epochs=5,
    init_lr=LEARNING_RATE,
    prev_num_epoch=45,
    prev_best_miou=61.378382,
    total_number_epochs=50
)

Epoch 1/5
----------
train Loss: 0.0164 Acc: 86.1205
val Loss: 0.0641 Acc: 61.2583
Epoch complete in 18m 41s
Next Learning Rate: [0.00012589254117941672, 0.001258925411794167]

Epoch 2/5
----------
train Loss: 0.0164 Acc: 86.0795
val Loss: 0.0644 Acc: 60.9707
Epoch complete in 15m 57s
Next Learning Rate: [0.00010298666348361787, 0.0010298666348361786]

Epoch 3/5
----------
train Loss: 0.0164 Acc: 86.1116
val Loss: 0.0648 Acc: 60.8351
Epoch complete in 15m 57s
Next Learning Rate: [7.949432487547622e-05, 0.0007949432487547622]

Epoch 4/5
----------
train Loss: 0.0164 Acc: 86.1507
val Loss: 0.0642 Acc: 61.2457
Epoch complete in 15m 57s
Next Learning Rate: [5.518918645844863e-05, 0.0005518918645844864]

Epoch 5/5
----------
train Loss: 0.0163 Acc: 86.1526
val Loss: 0.0652 Acc: 60.8997
Epoch complete in 15m 55s
Next Learning Rate: [2.9575152732566297e-05, 0.000295751527325663]

--------------------

Training complete in 82m 31s
Best val MIOU: 61.378382
Best val per class IOU: None

Total Ep

In [None]:
# FLOPS

model = deeplab.get_deeplab_v2(
    num_classes=NUM_CLASSES,
    pretrain=True,
    pretrain_model_path=PRETRAIN_MODEL_PATH
)

image = torch.zeros((1, 3, H, W))

flops = FlopCountAnalysis(model, image)
print(flop_count_table(flops))

Deeplab pretraining loading...
| module                         | #parameters or shape   | #flops     |
|:-------------------------------|:-----------------------|:-----------|
| model                          | 43.901M                | 0.376T     |
|  conv1                         |  9.408K                |  1.233G    |
|   conv1.weight                 |   (64, 3, 7, 7)        |            |
|  bn1                           |  0.128K                |  41.943M   |
|   bn1.weight                   |   (64,)                |            |
|   bn1.bias                     |   (64,)                |            |
|  layer1                        |  0.216M                |  7.295G    |
|   layer1.0                     |   75.008K              |   2.55G    |
|    layer1.0.conv1              |    4.096K              |    0.136G  |
|    layer1.0.bn1                |    0.128K              |    10.609M |
|    layer1.0.conv2              |    36.864K             |    1.222G  |
|    layer1.0.bn2   

In [None]:
model = deeplab.get_deeplab_v2(
    num_classes=NUM_CLASSES,
    pretrain=True,
    pretrain_model_path=PRETRAIN_MODEL_PATH
)

iterations = 1000

# random RGB image
image_np = np.random.rand(1, 3, H, W)
image = torch.from_numpy(image_np).float()

if torch.cuda.is_available():
     model = model.to('cuda')
     image = image.to('cuda')

latency = []
FPS = []

# calculate latency and FPS
for iter in range(iterations):
    start = time.time()
    output = model(image)
    end = time.time()

    latency_i = end - start
    latency.append(latency_i)

    FPS_i = 1 / latency_i
    FPS.append(FPS_i)

meanLatency = np.mean(latency) * 1000  # milliseconds
stdLatency = np.std(latency) * 1000   # milliseconds
meanFPS = np.mean(FPS)
stdFPS = np.std(FPS)

print(f"Mean Latency: {meanLatency:.2f} ms")
print(f"Latency Std Dev: {stdLatency:.2f} ms")
print(f"Mean FPS: {meanFPS:.2f}")
print(f"FPS Std Dev: {stdFPS:.2f}")

Deeplab pretraining loading...
Mean Latency: 67.94 ms
Latency Std Dev: 27.18 ms
Mean FPS: 14.95
FPS Std Dev: 1.63
