In [1]:
from a4_helper import *
from eecs598 import reset_seed
import multiprocessing

# Set a few constants related to data loading.
NUM_CLASSES = 20
BATCH_SIZE = 16
IMAGE_SHAPE = (224*4, 224*4)
NUM_WORKERS = multiprocessing.cpu_count()
from a4_helper import VOC2007DetectionTiny

train_dataset = VOC2007DetectionTiny(
    './A4', "train", image_size=IMAGE_SHAPE[0],
    download=False  # True (for the first time)
)
val_dataset = VOC2007DetectionTiny('./A4', "val", image_size=IMAGE_SHAPE[0])
DEVICE = 'cpu'

  warn(f"Failed to load image Python extension: {e}")


In [2]:
import torch
from common import DetectorBackboneWithFPN
import torchvision


# sanity check
backbone = DetectorBackboneWithFPN(out_channels=64)

dummy_images = torch.randn(2, 3, 224, 224)

dummy_fpn_feats = backbone(dummy_images)

For dummy input images with shape: (2, 3, 224, 224)
Shape of p3 features: torch.Size([2, 64, 28, 28])
Shape of p4 features: torch.Size([2, 64, 14, 14])
Shape of p5 features: torch.Size([2, 64, 7, 7])
Shape of p6 features: torch.Size([2, 64, 4, 4])
Shape of p7 features: torch.Size([2, 64, 2, 2])


In [3]:
# model block sanity check
model = torchvision.models.regnet_x_400mf(pretrained=True)
train_nodes, eval_nodes = torchvision.models.feature_extraction.get_graph_node_names(model)

print(train_nodes)
print(eval_nodes)

['x', 'stem.0', 'stem.1', 'stem.2', 'trunk_output.block1.block1-0.proj.0', 'trunk_output.block1.block1-0.proj.1', 'trunk_output.block1.block1-0.f.a.0', 'trunk_output.block1.block1-0.f.a.1', 'trunk_output.block1.block1-0.f.a.2', 'trunk_output.block1.block1-0.f.b.0', 'trunk_output.block1.block1-0.f.b.1', 'trunk_output.block1.block1-0.f.b.2', 'trunk_output.block1.block1-0.f.c.0', 'trunk_output.block1.block1-0.f.c.1', 'trunk_output.block1.block1-0.add', 'trunk_output.block1.block1-0.activation', 'trunk_output.block2.block2-0.proj.0', 'trunk_output.block2.block2-0.proj.1', 'trunk_output.block2.block2-0.f.a.0', 'trunk_output.block2.block2-0.f.a.1', 'trunk_output.block2.block2-0.f.a.2', 'trunk_output.block2.block2-0.f.b.0', 'trunk_output.block2.block2-0.f.b.1', 'trunk_output.block2.block2-0.f.b.2', 'trunk_output.block2.block2-0.f.c.0', 'trunk_output.block2.block2-0.f.c.1', 'trunk_output.block2.block2-0.add', 'trunk_output.block2.block2-0.activation', 'trunk_output.block2.block2-1.f.a.0', 'tru

In [4]:
# FPN trainability check

# from torch import nn
# from a4_helper import train_detector
# from common import DetectorBackboneWithFPN
# from FPN_ROI import RPN
# reset_seed(0)

# # Take equally spaced examples from training dataset to make a subset.
# small_dataset = torch.utils.data.Subset(
#     train_dataset,
#     torch.linspace(0, len(train_dataset) - 1, steps=BATCH_SIZE * 10).long()
# )
# small_train_loader = torch.utils.data.DataLoader(
#     small_dataset, batch_size=BATCH_SIZE, pin_memory=True
# )

# # Create a wrapper module to contain backbone + RPN:
# class FirstStage(nn.Module):
#     def __init__(self, fpn_channels: int):
#         super().__init__()
#         self.backbone = DetectorBackboneWithFPN(out_channels=fpn_channels)
#         self.rpn = RPN(
#             fpn_channels=fpn_channels,
#             # Simple stem of two layers:
#             stem_channels=[fpn_channels, fpn_channels],
#             batch_size_per_image=16,
#             anchor_stride_scale=8,
#             anchor_aspect_ratios=[0.5, 1.0, 2.0],
#             anchor_iou_thresholds=(0.3, 0.6),
#         )

#     def forward(self, images, gt_boxes=None):
#         feats_per_fpn_level = self.backbone(images)
#         return self.rpn(feats_per_fpn_level, self.backbone.fpn_strides, gt_boxes)


# first_stage = FirstStage(fpn_channels=64).to(DEVICE)

# train_detector(
#     first_stage,
#     small_train_loader,
#     learning_rate=8e-3,
#     max_iters=1000,
#     log_period=20,
#     device=DEVICE,
# )

In [5]:
# load data

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=BATCH_SIZE, pin_memory=True
)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=1, pin_memory=True
)

train_loader_iter = iter(train_loader)
image_paths, images, gt_boxes = train_loader_iter.next()

print(f"image paths           : {image_paths}")
print(f"image batch has shape : {images.shape}")
print(f"gt_boxes has shape    : {gt_boxes.shape}")

print(f"Five boxes per image  :")
print(gt_boxes[:, :5, :])

image paths           : ['./A4\\VOCdevkit/VOC2007/JPEGImages/000012.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000017.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000023.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000026.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000032.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000033.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000034.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000035.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000036.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000042.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000044.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000047.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000048.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000061.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000064.jpg', './A4\\VOCdevkit/VOC2007/JPEGImages/000066.jpg']
image batch has shape : torch.Size([16, 3, 896, 896])
gt_boxes has shape    : torch.Size([16, 40, 5])
Five boxes per image  :
tensor([[[195.7477, 260.9970, 720.4324, 726.4865,   6.0000],
    

In [6]:
from torch import nn

from a4_helper import train_detector
from common import DetectorBackboneWithFPN
from FPN_ROI import RPN

reset_seed(0)
from FPN_ROI import FasterRCNN

# Slightly larger detector than in above cell.
FPN_CHANNELS = 128
backbone = DetectorBackboneWithFPN(out_channels=FPN_CHANNELS)
rpn = RPN(
    fpn_channels=FPN_CHANNELS,
    stem_channels=[FPN_CHANNELS, FPN_CHANNELS],
    batch_size_per_image=16,
    pre_nms_topk=500,
    post_nms_topk=200  # Other args from previous cell are default args in RPN.
)
# fmt: off
faster_rcnn = FasterRCNN(
    backbone, rpn, num_classes=NUM_CLASSES, roi_size=(7, 7),
    stem_channels=[FPN_CHANNELS, FPN_CHANNELS],
    batch_size_per_image=32,
)
# fmt: on

train_detector(
    faster_rcnn,
    train_loader,
    learning_rate=0.01,
    max_iters=9000,
    log_period=50,
    device=DEVICE,
)

# After you've trained your model, save the weights for submission.
weights_path = os.path.join('./A4', "rcnn_detector.pt")
torch.save(faster_rcnn.state_dict(), weights_path)

For dummy input images with shape: (2, 3, 224, 224)
Shape of p3 features: torch.Size([16, 128, 112, 112])
Shape of p4 features: torch.Size([16, 128, 56, 56])
Shape of p5 features: torch.Size([16, 128, 28, 28])
Shape of p6 features: torch.Size([16, 128, 14, 14])
Shape of p7 features: torch.Size([16, 128, 7, 7])
dict_keys(['p3', 'p4', 'p5', 'p6', 'p7'])
torch.Size([3, 466, 525])
torch.Size([3, 337, 231])
torch.Size([3, 635, 771])
torch.Size([3, 528, 279])
torch.Size([3, 501, 633])
torch.Size([3, 767, 308])
torch.Size([3, 896, 643])
torch.Size([3, 896, 293])
torch.Size([3, 234, 664])
torch.Size([3, 112, 204])
torch.Size([3, 157, 58])
torch.Size([3, 335, 846])
torch.Size([3, 64, 30])
torch.Size([3, 85, 211])
torch.Size([3, 382, 896])
torch.Size([3, 189, 478])
torch.Size([3, 580, 608])
torch.Size([3, 633, 307])
torch.Size([3, 525, 525])
torch.Size([3, 702, 788])
torch.Size([3, 704, 413])
torch.Size([3, 703, 408])
torch.Size([3, 302, 573])
torch.Size([3, 885, 771])
torch.Size([3, 837, 896])
