In [None]:
!pip install fvcore

Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvcore)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Downloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Building wheels for collected packages: fvcore, iop

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# import modules
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
import time
import torch.optim as optim
import os
import sys

from torchvision import transforms
from torchvision.transforms import InterpolationMode
from fvcore.nn import FlopCountAnalysis, flop_count_table
from torch.utils.data import DataLoader

In [None]:
PROJECT_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project'
CITYSCAPES_DIR = '/content/drive/MyDrive/Cityspaces/'
BEST_MODEL_SAVE_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project/models/BiSeNet/checkpoints_training/best_model_BiSeNet.pth'
LAST_EPOCH_SAVE_PATH = '/content/drive/MyDrive/Colab Notebooks/MLME2025_project/models/BiSeNet/checkpoints_training/last_epoch_BiSeNet.pth'


NUM_CLASSES = 19
H = 512
W = 1024
BATCH_SIZE = 4
NUM_WORKERS = 4
LEARNING_RATE = 0.025
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0001

In [None]:
os.chdir(PROJECT_PATH)
sys.path.append(os.getcwd())

from datasets.cityscapes import CityScapes
from utils.utils import poly_lr_scheduler, poly_lr_scheduler_with_backbone, fast_hist, per_class_iou, mean_iou
from models.BiSeNet.build_bisenet import BiSeNet

In [None]:
torch.manual_seed(42)
np.random.seed(42)
random.seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
# transformers data

data_transforms = {
    'train': transforms.Compose([
        # resize as asked in the project + interpolation
        transforms.Resize((H, W), interpolation=InterpolationMode.BILINEAR),
        # transform to tensor + normalize
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((H, W), interpolation=InterpolationMode.BILINEAR),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# transformers label

label_transform = transforms.Compose([
    # resize as asked in the project + nearest
    transforms.Resize((H, W), interpolation=InterpolationMode.NEAREST),
    # transform to tensor without normalization
    transforms.Lambda(lambda x: torch.from_numpy(np.array(x)).long())
])

# create cityscapes datasets

cityscapes_datasets = {
    x:  CityScapes(
        data_path=CITYSCAPES_DIR,
        split=x,
        transform=data_transforms[x],
        label_transform=label_transform
      )

    for x in ['train', 'val']
}

# create cityscapes dataloader

cityscapes_dataloaders = {
    x: torch.utils.data.DataLoader(
        cityscapes_datasets[x],
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=True,
        pin_memory=True
      )
    for x in ['train', 'val']
}

# save datasets size

dataset_sizes = {x: len(cityscapes_datasets[x]) for x in ['train', 'val']}

In [None]:
def train_model(model, data_loader, dataset_sizes, criterion, optimizer,
                last_epoch_save_path, best_model_save_path, num_epochs=1,
                init_lr=0.01, prev_num_epoch=0, prev_best_miou=0,
                total_number_epochs=50):

    since = time.time()

    best_miou = prev_best_miou
    best_per_class_iou = None

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        since_epoch = time.time()

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0

            hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
            miou = 0

            for batch in data_loader[phase]:
                inputs = batch['x']
                labels = batch['y']

                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)

                    if phase == 'train':
                        loss = criterion(outputs[0], labels)
                        running_loss += loss.item()
                        loss.backward()

                        optimizer.step()

                        preds = torch.argmax(outputs[0], dim=1)
                        hist += fast_hist(
                                 preds.cpu().data.numpy().flatten(),
                                 labels.cpu().data.numpy().flatten(),
                                 NUM_CLASSES
                                )

                    else:
                        loss = criterion(outputs, labels)
                        running_loss += loss.item()

                        preds = torch.argmax(outputs, dim=1)
                        hist += fast_hist(
                                 preds.cpu().data.numpy().flatten(),
                                 labels.cpu().data.numpy().flatten(),
                                 NUM_CLASSES
                                )

            ious = per_class_iou(hist) * 100
            miou = mean_iou(ious)

            epoch_loss = running_loss / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {miou:.4f}')


        # update learning rate with poly_lr_scheduler
        next_lr = poly_lr_scheduler_with_backbone(optimizer, init_lr, prev_num_epoch, total_number_epochs)
        prev_num_epoch += 1

        # save the best model
        if miou > best_miou:
            best_miou = miou
            best_per_class_iou = ious
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, best_model_save_path)

        time_epoch = time.time() - since_epoch
        print(f'Epoch complete in {time_epoch // 60:.0f}m {time_epoch % 60:.0f}s')
        print(f'Next Learning Rate: {next_lr}')
        print()

        # save the last model
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, last_epoch_save_path)

    time_elapsed = time.time() - since
    print('-' * 20)
    print()
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val MIOU: {best_miou:4f}')
    print(f'Best val per class IOU: {best_per_class_iou}')
    print()
    print(f'Total Epochs completed: {prev_num_epoch}')

    return model, time_elapsed, best_miou, best_per_class_iou

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [None]:
context_path = 'resnet18'

model = BiSeNet(num_classes=NUM_CLASSES, context_path=context_path)
model = model.to(device)

optimizer = optim.SGD(
    params=[
    {'params': model.context_path.parameters(), 'lr': LEARNING_RATE * 0.1, 'initial_lr': LEARNING_RATE * 0.1},
    {'params': [p for module in model.mul_lr for p in module.parameters()], 'lr': LEARNING_RATE, 'initial_lr': LEARNING_RATE}
    ],
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY
)

criterion = nn.CrossEntropyLoss(ignore_index=255)

# load checkpoint previous epochs
checkpoint = torch.load(LAST_EPOCH_SAVE_PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 182MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 236MB/s]


In [None]:
model, time_elapsed, best_miou, best_per_class_iou = train_model(
    model=model,
    data_loader=cityscapes_dataloaders,
    dataset_sizes=dataset_sizes,
    criterion=criterion,
    optimizer=optimizer,
    last_epoch_save_path=LAST_EPOCH_SAVE_PATH,
    best_model_save_path=BEST_MODEL_SAVE_PATH,
    num_epochs=50,
    init_lr=LEARNING_RATE,
    prev_num_epoch=0,
    prev_best_miou=0,
    total_number_epochs=50
)

Epoch 1/50
----------
train Loss: 0.1370 Acc: 29.0402
val Loss: 0.1104 Acc: 34.1561
Epoch complete in 16m 5s
Current Learning Rate: [0.002455069401572873, 0.024550694015728725]

Epoch 2/50
----------
train Loss: 0.0860 Acc: 40.9785
val Loss: 0.0942 Acc: 40.5805
Epoch complete in 1m 26s
Current Learning Rate: [0.0024099322750412216, 0.02409932275041221]

Epoch 3/50
----------
train Loss: 0.0731 Acc: 47.9652
val Loss: 0.0891 Acc: 42.2885
Epoch complete in 1m 27s
Current Learning Rate: [0.0023647010126449453, 0.023647010126449448]

Epoch 4/50
----------
train Loss: 0.0647 Acc: 53.6112
val Loss: 0.0830 Acc: 45.2575
Epoch complete in 1m 27s
Current Learning Rate: [0.0023193734086485443, 0.023193734086485437]

Epoch 5/50
----------
train Loss: 0.0581 Acc: 57.9216
val Loss: 0.0796 Acc: 46.2836
Epoch complete in 1m 26s
Current Learning Rate: [0.002273947156492986, 0.022739471564929854]

Epoch 6/50
----------
train Loss: 0.0545 Acc: 60.5895
val Loss: 0.0779 Acc: 47.7315
Epoch complete in 1m 27s

In [None]:
# FLOPS

context_path = 'resnet18'
model = BiSeNet(num_classes=NUM_CLASSES, context_path=context_path)
model = model.to('cuda')
model.eval()

image = torch.zeros((1, 3, H, W)).to('cuda')

flops = FlopCountAnalysis(model, image)
print(flop_count_table(flops))

| module                                      | #parameters or shape   | #flops     |
|:--------------------------------------------|:-----------------------|:-----------|
| model                                       | 12.582M                | 25.78G     |
|  saptial_path                               |  0.371M                |  5.088G    |
|   saptial_path.convblock1                   |   1.856K               |   0.243G   |
|    saptial_path.convblock1.conv1            |    1.728K              |    0.226G  |
|    saptial_path.convblock1.bn               |    0.128K              |    16.777M |
|   saptial_path.convblock2                   |   73.984K              |   2.424G   |
|    saptial_path.convblock2.conv1            |    73.728K             |    2.416G  |
|    saptial_path.convblock2.bn               |    0.256K              |    8.389M  |
|   saptial_path.convblock3                   |   0.295M               |   2.42G    |
|    saptial_path.convblock3.conv1            |    0.2

In [None]:
# LATENCY and FPS

context_path = 'resnet18'
model = BiSeNet(num_classes=NUM_CLASSES, context_path=context_path)
model = model.to('cuda')
model.eval()

iterations = 1000

image_np = np.random.rand(1, 3, H, W)
image = torch.from_numpy(image_np).float()

if torch.cuda.is_available():
    model = model.to('cuda')
    image = image.to('cuda')

latency = []
FPS = []

for iter in range(iterations):
    start = time.time()
    output = model(image)
    end = time.time()

    latency_i = end - start
    latency.append(latency_i)

    FPS_i = 1 / latency_i
    FPS.append(FPS_i)

meanLatency = np.mean(latency) * 1000
stdLatency = np.std(latency) * 1000
meanFPS = np.mean(FPS)
stdFPS = np.std(FPS)

print(f"Mean Latency: {meanLatency:.2f} ms")
print(f"Latency Std Dev: {stdLatency:.2f} ms")
print(f"Mean FPS: {meanFPS:.2f}")
print(f"FPS Std Dev: {stdFPS:.2f}")

Mean Latency: 6.71 ms
Latency Std Dev: 0.34 ms
Mean FPS: 149.59
FPS Std Dev: 9.47
