# hw2: Single-shot Multibox Object Detection
2018131605 원종빈

In [20]:
# !pip install d2l
# !git clone https://github.com/MLman/d2l-pytorch.git

%matplotlib inline
import sys, os
sys.path.insert(0, '..')
from d2l import torch as d2l
# from d2l.ssd_utils import *
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import json
import time
from tqdm import tqdm
from PIL import Image

In [6]:
def cls_predictor(input_channels, num_anchors, num_classes):
    return nn.Conv2d(in_channels=input_channels, out_channels=num_anchors * (num_classes + 1), kernel_size=3,
                     padding=1)

In [7]:
def bbox_predictor(input_channels, num_anchors):
    return nn.Conv2d(in_channels=input_channels, out_channels=num_anchors * 4, kernel_size=3, padding=1)

In [8]:
def forward(x, block):
    return block(x)
Y1 = forward(torch.zeros((2, 8, 20, 20)), cls_predictor(8, 5, 10))
Y2 = forward(torch.zeros((2, 16, 10, 10)), cls_predictor(16, 3, 10))
Y1.shape, Y2.shape

(torch.Size([2, 55, 20, 20]), torch.Size([2, 33, 10, 10]))

In [9]:
def flatten_pred(pred):
    return torch.flatten(pred.permute(0,2,3,1), start_dim=1)

def concat_preds(preds):
    return torch.cat([flatten_pred(p) for p in preds], dim=1)

concat_preds([Y1, Y2]).shape

torch.Size([2, 25300])

In [10]:
def down_sample_blk(in_channels, out_channels):
    blk = []
    for _ in range(2):
        blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        blk.append(nn.BatchNorm2d(out_channels))
        blk.append(nn.ReLU())
        in_channels = out_channels

    blk.append(nn.MaxPool2d(2, 2))
    return nn.Sequential(*blk)

forward(torch.zeros((2,3,20,20)), down_sample_blk(3, 10)).shape

torch.Size([2, 10, 10, 10])

In [11]:
def base_net():
    blk = []
    num_filters = [3, 16, 32, 64]
    for i in range(len(num_filters)-1):
        blk.append(down_sample_blk(num_filters[i], num_filters[i+1]))
    return nn.Sequential(*blk)

forward(torch.zeros((2,3,256,256)), base_net()).shape

torch.Size([2, 64, 32, 32])

In [12]:
## Complete model
def get_blk(i):
    if i == 0:
        blk = base_net()
    elif i == 1:
        blk = down_sample_blk(64, 128)
    elif i == 4:
        blk = nn.AdaptiveAvgPool2d((1,1))
    else:
        blk = down_sample_blk(128, 128)
    return blk


def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
    Y = blk(X)
    anchors = d2l.multibox_prior(Y, sizes=size, ratios=ratio)
    cls_preds = cls_predictor(Y)
    bbox_preds = bbox_predictor(Y)
    return (Y, anchors, cls_preds, bbox_preds)

In [13]:
import itertools, math

def create_anchors(feature_map_sizes, steps, sizes):
    scale = 256.
    steps = [s / scale for s in steps]
    sizes = [s / scale for s in sizes]
    
    aspect_ratios = ((2,),)  ## why use tuple in tuple? for multiple aspect_ratios?
    
    num_layers = len(feature_map_sizes)
    boxes = []
    for i in range(num_layers):
        fmsize = feature_map_sizes[i]
        for h, w in itertools.product(range(fmsize), repeat=2):
            cx = (w + 0.5) * steps[i]
            cy = (h + 0.5) * steps[i]
            s = sizes[i]
            boxes.append((cx, cy, s, s))
            
            s = sizes[i+1]
            boxes.append((cx, cy, s, s))
            
            for ar in aspect_ratios[i]:
                boxes.append((cx, cy, (s * math.sqrt(ar)), (s / math.sqrt(ar))))
                boxes.append((cx, cy, (s / math.sqrt(ar)), (s * math.sqrt(ar))))
    
    return torch.Tensor(boxes)


In [14]:
sizes = [[0.2*256, 0.272*256], [0.37*256, 0.447*256], [0.54*256, 0.619*256],
         [0.71*256, 0.79*256], [0.88*256, 0.961*256]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1

In [41]:
## Define TinySSD
class TinySSD(nn.Module):
    def __init__(self, input_channels, num_classes):
        super().__init__()
        input_channels_cls = 128
        input_channels_bbox = 128
        self.num_classes = num_classes
        
        self.blk = []
        self.cls = []
        self.bbox = []
        
        self.blk_0 = get_blk(0)
        self.blk_1 = get_blk(1)
        self.blk_2 = get_blk(2)
        self.blk_3 = get_blk(3)
        self.blk_4 = get_blk(4)
        
        self.cls_0 = cls_predictor(64, num_anchors, num_classes)
        self.cls_1 = cls_predictor(input_channels_cls, num_anchors, num_classes)
        self.cls_2 = cls_predictor(input_channels_cls, num_anchors, num_classes)
        self.cls_3 = cls_predictor(input_channels_cls, num_anchors, num_classes)
        self.cls_4 = cls_predictor(input_channels_cls, num_anchors, num_classes)
        
        self.bbox_0 = bbox_predictor(64, num_anchors)
        self.bbox_1 = bbox_predictor(input_channels_bbox, num_anchors)
        self.bbox_2 = bbox_predictor(input_channels_bbox, num_anchors)
        self.bbox_3 = bbox_predictor(input_channels_bbox, num_anchors)
        self.bbox_4 = bbox_predictor(input_channels_bbox, num_anchors)
        
    
    def forward(self, X):
        anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
        
        X, anchors[0], cls_preds[0], bbox_preds[0] = blk_forward(X, self.blk_0, sizes[0], ratios[0], self.cls_0, self.bbox_0)
        X, anchors[1], cls_preds[1], bbox_preds[1] = blk_forward(X, self.blk_1, sizes[1], ratios[1], self.cls_1, self.bbox_1)
        X, anchors[2], cls_preds[2], bbox_preds[2] = blk_forward(X, self.blk_2, sizes[2], ratios[2], self.cls_2, self.bbox_2)
        X, anchors[3], cls_preds[3], bbox_preds[3] = blk_forward(X, self.blk_3, sizes[3], ratios[3], self.cls_3, self.bbox_3)
        X, anchors[4], cls_preds[4], bbox_preds[4] = blk_forward(X, self.blk_4, sizes[4], ratios[4], self.cls_4, self.bbox_4)
        
        print(anchors[0].shape)
        print(anchors[1].shape)
        print(anchors[2].shape)
        print(anchors[3].shape)
        print(anchors[4].shape)
            
        print(torch.cat(anchors, dim=0))
        print(concat_preds(cls_preds).reshape((-1, 5444, self.num_classes + 1)))
        print(concat_preds(bbox_preds))
        
        return (torch.cat(anchors, dim=0), concat_preds(cls_preds).reshape((-1, 5444, self.num_classes + 1)), concat_preds(bbox_preds))
        
    
    
        

In [42]:
anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5

def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)
    
net = TinySSD(3, num_classes=1)
net.apply(init_weights)

X = torch.zeros((32,3,256,256))
anchors, cls_preds, bbox_preds = net(X)

anchors.shape, cls_preds.shape, bbox_preds.shape

torch.Size([1, 4096, 4])
torch.Size([1, 1024, 4])
torch.Size([1, 256, 4])
torch.Size([1, 64, 4])
torch.Size([1, 4, 4])


RuntimeError: Sizes of tensors must match except in dimension 0. Expected size 4096 but got size 1024 for tensor number 1 in the list.