In [4]:
import torch
from torch import nn
from torchvision.models import vgg16


class SSD(nn.Module):
    def __init__(self):
        super(SSD, self).__init__()
        self.base_network = self.base_net()

    def forward(self, X):
        return self.base_network(X)

    @staticmethod
    def base_net():
        net = vgg16(weights=False).features
        return net

In [5]:
model = SSD()
X = torch.rand(1, 3, 96, 96)
model(X)

tensor([[[[0.1190, 0.0635, 0.0713],
          [0.0953, 0.0482, 0.0466],
          [0.0421, 0.0000, 0.0000]],

         [[0.0000, 0.0790, 0.0946],
          [0.0000, 0.0812, 0.0827],
          [0.0571, 0.1145, 0.1117]],

         [[0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000],
          [0.0000, 0.0415, 0.0237]],

         ...,

         [[0.0681, 0.0727, 0.0612],
          [0.1037, 0.1705, 0.1140],
          [0.1558, 0.2217, 0.1867]],

         [[0.0074, 0.0730, 0.0639],
          [0.0000, 0.0103, 0.0317],
          [0.0000, 0.0357, 0.0725]],

         [[0.0000, 0.0000, 0.0262],
          [0.0613, 0.0589, 0.0473],
          [0.1228, 0.1786, 0.2209]]]], grad_fn=<MaxPool2DWithIndicesBackward0>)

In [6]:
model.__dir__()

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_call_impl',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_backward_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_set',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_ver

In [7]:
import torch
from torch import nn
from torchvision.models import vgg16
from utils import get_multi_anchor_prior


class SSD(nn.Module):
    def __init__(self, num_classes, **kwargs):
        super(SSD, self).__init__()
        self.base_network = self.base_net()
        self.net = nn.Sequential()
        self.num_classes = num_classes
        self.sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79], [0.88, 0.961]]
        self.ratios = [[1, 2, 0.5]] * 5
        self.num_anchors = len(self.sizes[0]) + len(self.ratios[0]) - 1
        for i in range(5):
            setattr(self, f"blk_{i}" , self.get_blk(i))
            setattr(self,f'cls_{i}', self.cls_predictor(num_classes=self.num_classes,
                                                               num_anchors=self.num_anchors))
            setattr(self, f'bbox_{i}', self.bbox_predictor(self.num_anchors))

    def forward(self, X):
        anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
        for i in range(5):
            X, anchors[i], cls_preds[i], bbox_preds[i] = self.blk_forward(
                X, getattr(self, f"blk_{i}"), self.sizes[i], self.ratios[i],
                getattr(self, f"cls_{i}"), getattr(self, f"bbox_{i}"))

        anchors = torch.cat(anchors, dim=1)
        cls_preds = self.concat_preds(cls_preds)
        cls_preds = cls_preds.reshape(cls_preds.shape[0], -1, self.num_classes + 1)
        bbox_preds = self.concat_preds(bbox_preds)
        return anchors, cls_preds, bbox_preds

    @staticmethod
    def base_net():
        """
        VGG16 contains 3 block:
        VGG16 = Features + avgpoll + classifier
        for this project, I just want to use the features block
        """
        net = vgg16(pretrained=False).features
        return net

    @staticmethod
    def cls_predictor(num_anchors, num_classes):
        return nn.LazyConv2d(num_anchors * (num_classes + 1), kernel_size=3, padding=1)

    @staticmethod
    def bbox_predictor(num_anchors):
        return nn.LazyConv2d(num_anchors * 4, kernel_size=3, padding=1)

    @staticmethod
    def small_forward(x, block):
        return block(x)

    @staticmethod
    def flatten_pred(pred):
        return torch.flatten(pred.permute(0, 2, 3, 1), start_dim=1)

    @classmethod
    def concat_preds(cls, preds):
        return torch.cat([cls.flatten_pred(pred) for pred in preds], dim=1)

    @staticmethod
    def down_sample_blk(out_channels):
        blk = []
        for _ in range(2):
            blk.append(nn.LazyConv2d(out_channels, kernel_size=3, padding=1))
            blk.append(nn.BatchNorm2d(out_channels))
            blk.append(nn.ReLU())
        blk.append(nn.MaxPool2d(kernel_size=2))
        return nn.Sequential(*blk)

    @classmethod
    def get_blk(cls, i):
        if i == 0:
            blk = cls.base_net()
        elif i == 1:
            blk = cls.down_sample_blk(128)
        elif i == 4:
            blk = nn.AdaptiveAvgPool2d((1, 1))
        else:
            blk = cls.down_sample_blk(128)
        return blk

    @staticmethod
    def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
        Y = blk(X)
        anchors = get_multi_anchor_prior(Y, sizes=size, ratios=ratio)
        cls_preds = cls_predictor(Y)
        bbox_preds = bbox_predictor(Y)
        return Y, anchors, cls_preds, bbox_preds


In [8]:
net = SSD(num_classes=1)
X = torch.zeros((32, 3, 256, 256))
anchors, cls_preds, bbox_preds = net(X)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [9]:
anchors

tensor([[[-0.0375, -0.0375,  0.1625,  0.1625],
         [-0.0735, -0.0735,  0.1985,  0.1985],
         [-0.0789, -0.0082,  0.2039,  0.1332],
         ...,
         [ 0.0195,  0.0195,  0.9805,  0.9805],
         [-0.1223,  0.1889,  1.1223,  0.8111],
         [ 0.1889, -0.1223,  0.8111,  1.1223]]])

In [10]:
cls_preds

tensor([[[ 0.0052,  0.0053],
         [-0.0139,  0.0142],
         [-0.0073,  0.0133],
         ...,
         [ 0.0099, -0.0282],
         [ 0.0020,  0.0154],
         [-0.0234,  0.0217]],

        [[ 0.0052,  0.0053],
         [-0.0139,  0.0142],
         [-0.0073,  0.0133],
         ...,
         [ 0.0099, -0.0282],
         [ 0.0020,  0.0154],
         [-0.0234,  0.0217]],

        [[ 0.0052,  0.0053],
         [-0.0139,  0.0142],
         [-0.0073,  0.0133],
         ...,
         [ 0.0099, -0.0282],
         [ 0.0020,  0.0154],
         [-0.0234,  0.0217]],

        ...,

        [[ 0.0052,  0.0053],
         [-0.0139,  0.0142],
         [-0.0073,  0.0133],
         ...,
         [ 0.0099, -0.0282],
         [ 0.0020,  0.0154],
         [-0.0234,  0.0217]],

        [[ 0.0052,  0.0053],
         [-0.0139,  0.0142],
         [-0.0073,  0.0133],
         ...,
         [ 0.0099, -0.0282],
         [ 0.0020,  0.0154],
         [-0.0234,  0.0217]],

        [[ 0.0052,  0.0053],
       

In [11]:
bbox_preds

tensor([[ 0.0131,  0.0065, -0.0021,  ..., -0.0264, -0.0070, -0.0232],
        [ 0.0131,  0.0065, -0.0021,  ..., -0.0264, -0.0070, -0.0232],
        [ 0.0131,  0.0065, -0.0021,  ..., -0.0264, -0.0070, -0.0232],
        ...,
        [ 0.0131,  0.0065, -0.0021,  ..., -0.0264, -0.0070, -0.0232],
        [ 0.0131,  0.0065, -0.0021,  ..., -0.0264, -0.0070, -0.0232],
        [ 0.0131,  0.0065, -0.0021,  ..., -0.0264, -0.0070, -0.0232]],
       grad_fn=<CatBackward0>)