# Two-Stage Object detection w/ Freeze Learning & Fine-tuning
For this approach, it will be used a pretrained DensetNet201 as a backbone for a F-RCNN model for object detection, to which it will be applied freeze learning and fine-tuning to our task.

In [1]:
import os

DATA_DIR = "../data/"
IMG_DIR = DATA_DIR + "/images/"
ANNOTATION_DIR = DATA_DIR + "/annotations/"
SPLITS_DIR = DATA_DIR + "/dl-split/"
OUT_DIR = "./out/two_stage_obj_det/"

os.makedirs(OUT_DIR, exist_ok=True)

SEED = 42

# Load Dataset

In [2]:
# Fetching pre-defined splits
train_split = []
test_split = []

with open(SPLITS_DIR + "/train.txt") as train_split_f:
    train_split = [line.strip("\n") for line in train_split_f.readlines()]

with open(SPLITS_DIR + "/test.txt") as test_split_f:
    test_split = [line.strip("\n") for line in test_split_f.readlines()]

In [3]:
# Label mapping
label_encode_map = {
    "background": 0,
    "trafficlight": 1,
    "speedlimit": 2,
    "crosswalk": 3,
    "stop": 4,
}

label_decode_map = {
    0: "background",
    1: "trafficlight",
    2: "speedlimit",
    3: "crosswalk",
    4: "stop",
}

In [4]:
from datasets.road_sign_dataset import RoadSignDataset

# Training dataset
training_data = RoadSignDataset(
    img_names=train_split,
    img_dir=IMG_DIR,
    annotation_dir=ANNOTATION_DIR,
    classes=label_encode_map,
    is_train=True,
    multilabel=False,
    obj_detection=True
)

# Test dataset
testing_data = RoadSignDataset(
    img_names=test_split,
    img_dir=IMG_DIR,
    annotation_dir=ANNOTATION_DIR,
    classes=label_encode_map,
    is_train=False,
    multilabel=False,
    obj_detection=True
)

# Split training dataset into train and validation splits

In [5]:
import numpy as np
from torch.utils.data import SubsetRandomSampler

np.random.seed(SEED)

train_indices = list(range(len(training_data)))
np.random.shuffle(train_indices)
train_val_split = int(np.floor(0.2 * len(train_indices)))

train_idx, val_idx = train_indices[train_val_split:], train_indices[:train_val_split]
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

# Create Dataloaders

In [6]:
from torch.utils.data import DataLoader

BATCH_SIZE = 8 # Tested on 1050TI with 4GB
NUM_WORKERS = 4

train_dataloader = DataLoader(
    dataset=training_data,
    sampler=train_sampler,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=False, # Must be False because we're using a random sampler already
    drop_last=True,
    collate_fn=training_data.collate_fn
)

val_dataloader = DataLoader(
    dataset=training_data,
    sampler=val_sampler,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=False, # Must be False because we're using a random sampler already
    drop_last=True,
    collate_fn=training_data.collate_fn
)

test_dataloader = DataLoader(
    dataset=testing_data,
    batch_size=1,
    num_workers=NUM_WORKERS,
    shuffle=False,
    drop_last=False,
    collate_fn=testing_data.collate_fn
)

# Model Definition

In [7]:
from torchvision import models
from torch import nn
from torchvision.models.detection.backbone_utils import (
    LastLevelMaxPool,
    BackboneWithFPN
)
from torchvision.ops import (
    FeaturePyramidNetwork
)
from torchinfo import summary

MODEL_NAME = "FRCNN-Densenet201"
N_CLASSES = 4 + 1 # 4 target classes + background
"""_summary_
Adapted from fasterrcnn_resnet50_fpn at https://github.com/pytorch/vision/blob/c890a7e75ebeaaa75ae9ace4c203b7fc145df068/torchvision/models/detection/faster_rcnn.py
"""
def get_model(n_classes):
    backbone = models.densenet169(pretrained=True)
    
    return_layers = ["denseblock1", "denseblock2", "denseblock3", "denseblock4"]
    return_layers = {k: str(v) for v, k in enumerate(return_layers)}
    in_channel_list = [256, 512, 1280, 1664]
    backbone = BackboneWithFPN(
        backbone=backbone.features,
        return_layers=return_layers,
        in_channels_list=in_channel_list,
        out_channels=256,
        extra_blocks=LastLevelMaxPool()
    )
    
    model = models.detection.FasterRCNN(
        backbone=backbone,
        num_classes=n_classes
    )

    return model

model = get_model(n_classes=N_CLASSES)

summary(model)

Layer (type:depth-idx)                        Param #
FasterRCNN                                    --
├─GeneralizedRCNNTransform: 1-1               --
├─BackboneWithFPN: 1-2                        --
│    └─IntermediateLayerGetter: 2-1           --
│    │    └─Conv2d: 3-1                       9,408
│    │    └─BatchNorm2d: 3-2                  128
│    │    └─ReLU: 3-3                         --
│    │    └─MaxPool2d: 3-4                    --
│    │    └─_DenseBlock: 3-5                  335,040
│    │    └─_Transition: 3-6                  33,280
│    │    └─_DenseBlock: 3-7                  919,680
│    │    └─_Transition: 3-8                  132,096
│    │    └─_DenseBlock: 3-9                  8,071,680
│    │    └─_Transition: 3-10                 1,609,216
│    │    └─_DenseBlock: 3-11                 6,978,560
│    └─FeaturePyramidNetwork: 2-2             --
│    │    └─ModuleList: 3-12                  1,147,904
│    │    └─ModuleList: 3-13                  2,360,320
│    │

# Define Optimizer, LR Scheduler and Metric Scorer

In [8]:
from torch import optim
from torchmetrics.detection.mean_ap import MeanAveragePrecision

optimizer = optim.Adam(
    params=model.parameters(),
    lr=1e-3,
    betas=(0.9, 0.999),
    weight_decay=5e-4,
    amsgrad=True
)

lr_scheduler = optim.lr_scheduler.ExponentialLR(
    optimizer=optimizer,
    gamma=0.9
)

def get_metric_scorer():
    # This needs to be instantied everytime, otherwise scores accumulate
    return MeanAveragePrecision(box_format="xyxy")

# Define Epoch Iteration

In [9]:
import torch
from tqdm import tqdm

def train_epoch_iter(dataloader, model, device, optimizer, lr_scheduler=None):
    num_batches = len(dataloader)

    model.train()

    total_loss = 0.0

    scaler = torch.cuda.amp.GradScaler()

    with torch.set_grad_enabled(True):
        for _batch, (X, y) in enumerate(tqdm(dataloader)):
            for annotation in y:
                annotation.pop("imageId")
                annotation.pop("areas")
            X = list(x.to(device) for x in X)
            y = [ { k: v.to(device) for k, v in annotation.items() } for annotation in y]

            with torch.cuda.amp.autocast():
                outputs = model(X, y)

            losses = sum(loss for loss in outputs.values())

            optimizer.zero_grad()
            scaler.scale(losses).backward()
            scaler.step(optimizer)

            total_loss += losses.item()

            scaler.update()
    
        if lr_scheduler is not None:
            lr_scheduler.step()
        
    return total_loss / num_batches

In [10]:
def eval_epoch_iter(dataloader, model, device):
    model.eval()

    preds = []
    expected_labels = []
    imageIds = []

    with torch.set_grad_enabled(False):
        for _batch, (X, y) in enumerate(tqdm(dataloader)):
            X = list(x.to(device) for x in X)

            with torch.cuda.amp.autocast():
                outputs = model(X)

            for annotation in y:
                imageIds.append(annotation.pop("imageId").item())
                annotation.pop("areas")

            # Put everything in CPU for safety
            outputs = [ {k: v.detach().cpu() for k, v in o.items()} for o in outputs ]
            y = [ {k: v.detach().cpu() for k, v in annotation.items()} for annotation in y]

            preds.extend(outputs)
            expected_labels.extend(y)
            
    return expected_labels, preds, imageIds

# Train Model

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

Using device: cuda:0


In [12]:
NUM_EPOCHS = 30

model.to(device)
model.backbone.body.requires_grad_(False) # Freeze feature layer

train_loss_history = []
val_map_history = []

best_map = np.inf
best_epoch = -1

print(f"Starting {MODEL_NAME} training...")

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"Epoch[{epoch}/{NUM_EPOCHS}]")

    train_loss = train_epoch_iter(
        dataloader=train_dataloader,
        model=model,
        device=device,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler
    )

    print(f"Training loss: {train_loss:.3f}")

    val_target, val_preds, _ = eval_epoch_iter(
        dataloader=val_dataloader,
        model=model,
        device=device
    )

    metric_scorer = get_metric_scorer()
    
    metric_scorer.update(val_preds, val_target)
    # Only present mAP for simplicity
    val_map = metric_scorer.compute()["map"].item()
    print(f"Validation mAP: {val_map:3f}")

    # Save best model
    if val_map > best_map:
        best_map = val_map
        best_epoch = epoch
        save_dict = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch}
        torch.save(save_dict, f"{OUT_DIR}/{MODEL_NAME}_best_model.pth")

    # Save latest model
    save_dict = {"model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch}
    torch.save(save_dict, f"{OUT_DIR}/{MODEL_NAME}_latest_model.pth")

    # Save loss and mAP in history
    train_loss_history.append(train_loss)

    val_map_history.append(val_map)

    print("----------------------------------------------------------------")

print(
    f"\nFinished training..."
    f"\nBest epoch: {best_epoch}\t Validation mAP on best epoch: {best_map}"
)


Starting FRCNN-Densenet201 training...
Epoch[1/30]


100%|██████████| 61/61 [04:29<00:00,  4.42s/it]


Training loss: 0.461


100%|██████████| 15/15 [00:50<00:00,  3.37s/it]


Validation mAP: 0.010974
----------------------------------------------------------------
Epoch[2/30]


100%|██████████| 61/61 [04:18<00:00,  4.24s/it]


Training loss: 0.248


100%|██████████| 15/15 [00:42<00:00,  2.84s/it]


Validation mAP: 0.066365
----------------------------------------------------------------
Epoch[3/30]


100%|██████████| 61/61 [04:13<00:00,  4.15s/it]


Training loss: 0.230


100%|██████████| 15/15 [00:38<00:00,  2.55s/it]


Validation mAP: 0.096671
----------------------------------------------------------------
Epoch[4/30]


100%|██████████| 61/61 [04:11<00:00,  4.13s/it]


Training loss: 0.191


100%|██████████| 15/15 [00:37<00:00,  2.50s/it]


Validation mAP: 0.112983
----------------------------------------------------------------
Epoch[5/30]


100%|██████████| 61/61 [04:12<00:00,  4.14s/it]


Training loss: 0.211


100%|██████████| 15/15 [00:39<00:00,  2.60s/it]


Validation mAP: 0.149182
----------------------------------------------------------------
Epoch[6/30]


100%|██████████| 61/61 [04:17<00:00,  4.23s/it]


Training loss: 0.194


100%|██████████| 15/15 [00:44<00:00,  2.98s/it]


Validation mAP: 0.133527
----------------------------------------------------------------
Epoch[7/30]


100%|██████████| 61/61 [04:13<00:00,  4.16s/it]


Training loss: 0.169


100%|██████████| 15/15 [00:43<00:00,  2.91s/it]


Validation mAP: 0.151650
----------------------------------------------------------------
Epoch[8/30]


100%|██████████| 61/61 [04:13<00:00,  4.15s/it]


Training loss: 0.160


100%|██████████| 15/15 [00:37<00:00,  2.53s/it]


Validation mAP: 0.147950
----------------------------------------------------------------
Epoch[9/30]


100%|██████████| 61/61 [04:10<00:00,  4.11s/it]


Training loss: 0.167


100%|██████████| 15/15 [00:43<00:00,  2.92s/it]


Validation mAP: 0.238225
----------------------------------------------------------------
Epoch[10/30]


100%|██████████| 61/61 [04:08<00:00,  4.07s/it]


Training loss: 0.157


100%|██████████| 15/15 [00:44<00:00,  2.97s/it]


Validation mAP: 0.184426
----------------------------------------------------------------
Epoch[11/30]


100%|██████████| 61/61 [04:07<00:00,  4.06s/it]


Training loss: 0.147


100%|██████████| 15/15 [00:38<00:00,  2.54s/it]


Validation mAP: 0.194871
----------------------------------------------------------------
Epoch[12/30]


100%|██████████| 61/61 [04:12<00:00,  4.13s/it]


Training loss: 0.161


100%|██████████| 15/15 [00:39<00:00,  2.63s/it]


Validation mAP: 0.198311
----------------------------------------------------------------
Epoch[13/30]


100%|██████████| 61/61 [04:10<00:00,  4.11s/it]


Training loss: 0.152


100%|██████████| 15/15 [00:36<00:00,  2.44s/it]


Validation mAP: 0.191340
----------------------------------------------------------------
Epoch[14/30]


100%|██████████| 61/61 [04:08<00:00,  4.07s/it]


Training loss: 0.166


100%|██████████| 15/15 [00:39<00:00,  2.61s/it]


Validation mAP: 0.246171
----------------------------------------------------------------
Epoch[15/30]


100%|██████████| 61/61 [04:12<00:00,  4.13s/it]


Training loss: 0.148


100%|██████████| 15/15 [00:43<00:00,  2.90s/it]


Validation mAP: 0.203845
----------------------------------------------------------------
Epoch[16/30]


100%|██████████| 61/61 [04:07<00:00,  4.07s/it]


Training loss: 0.139


100%|██████████| 15/15 [00:37<00:00,  2.47s/it]


Validation mAP: 0.161978
----------------------------------------------------------------
Epoch[17/30]


100%|██████████| 61/61 [04:09<00:00,  4.09s/it]


Training loss: 0.148


100%|██████████| 15/15 [00:37<00:00,  2.51s/it]


Validation mAP: 0.228848
----------------------------------------------------------------
Epoch[18/30]


100%|██████████| 61/61 [04:09<00:00,  4.10s/it]


Training loss: 0.163


100%|██████████| 15/15 [00:36<00:00,  2.46s/it]


Validation mAP: 0.256801
----------------------------------------------------------------
Epoch[19/30]


100%|██████████| 61/61 [04:08<00:00,  4.07s/it]


Training loss: 0.137


100%|██████████| 15/15 [00:37<00:00,  2.47s/it]


Validation mAP: 0.265114
----------------------------------------------------------------
Epoch[20/30]


100%|██████████| 61/61 [04:07<00:00,  4.06s/it]


Training loss: 0.152


100%|██████████| 15/15 [00:36<00:00,  2.46s/it]


Validation mAP: 0.226959
----------------------------------------------------------------
Epoch[21/30]


100%|██████████| 61/61 [04:10<00:00,  4.10s/it]


Training loss: 0.155


100%|██████████| 15/15 [00:41<00:00,  2.80s/it]


Validation mAP: 0.221393
----------------------------------------------------------------
Epoch[22/30]


100%|██████████| 61/61 [04:07<00:00,  4.05s/it]


Training loss: 0.138


100%|██████████| 15/15 [00:39<00:00,  2.64s/it]


Validation mAP: 0.291542
----------------------------------------------------------------
Epoch[23/30]


100%|██████████| 61/61 [04:12<00:00,  4.14s/it]


Training loss: 0.122


100%|██████████| 15/15 [00:39<00:00,  2.64s/it]


Validation mAP: 0.262315
----------------------------------------------------------------
Epoch[24/30]


100%|██████████| 61/61 [04:12<00:00,  4.13s/it]


Training loss: 0.129


100%|██████████| 15/15 [00:42<00:00,  2.82s/it]


Validation mAP: 0.287306
----------------------------------------------------------------
Epoch[25/30]


100%|██████████| 61/61 [04:07<00:00,  4.06s/it]


Training loss: 0.134


100%|██████████| 15/15 [00:41<00:00,  2.76s/it]


Validation mAP: 0.249583
----------------------------------------------------------------
Epoch[26/30]


100%|██████████| 61/61 [04:11<00:00,  4.12s/it]


Training loss: 0.128


100%|██████████| 15/15 [00:40<00:00,  2.70s/it]


Validation mAP: 0.279420
----------------------------------------------------------------
Epoch[27/30]


100%|██████████| 61/61 [04:10<00:00,  4.11s/it]


Training loss: 0.129


100%|██████████| 15/15 [00:38<00:00,  2.54s/it]


Validation mAP: 0.328182
----------------------------------------------------------------
Epoch[28/30]


100%|██████████| 61/61 [04:08<00:00,  4.08s/it]


Training loss: 0.132


100%|██████████| 15/15 [00:39<00:00,  2.60s/it]


Validation mAP: 0.258429
----------------------------------------------------------------
Epoch[29/30]


100%|██████████| 61/61 [04:06<00:00,  4.05s/it]


Training loss: 0.135


100%|██████████| 15/15 [00:43<00:00,  2.92s/it]


Validation mAP: 0.306566
----------------------------------------------------------------
Epoch[30/30]


100%|██████████| 61/61 [04:07<00:00,  4.06s/it]


Training loss: 0.128


100%|██████████| 15/15 [00:42<00:00,  2.83s/it]


Validation mAP: 0.294273
----------------------------------------------------------------

Finished training...
Best epoch: 1	 Validation mAP on best epoch: 0.010974389500916004
