# **Pytorch ResNet implemention**
- A simple implemention of ResNet with Pytorch
- Including ResNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152
- Mainly focus on how the BasicBlock and Bottleneck work
- Notice the in_dim and out_dim means the third dim of feature matrix: The number of conv filters

---

## Import all the dependencies

In [1]:
import torch
import torch.nn as nn

---

## Implement the ***BasicBlock***
- Use in ResNet-18 and ResNet-34
- Expansion = 1: The block will not change the input dimension (number of planes)
- If the in_dim != out_dim, we have to use downsample to make the gradient flow works
    - No need of the third 1x1 conv layer as no reconstruction need to be done


In [32]:
class BasicBlock(nn.Module):
    # the in_dim = out_dim
    expansion = 1

    def __init__(self, in_dim, out_dim, stride=1, downsample=None, dilation=1, norm_layer=None):
        super(BasicBlock, self).__init__()
        # set the norm layer
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        # 3x3 conv, transfer the number of planes
        self.conv1 = nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = norm_layer(out_dim)
        # the number of planes will not change inside the block
        self.conv2 = nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = norm_layer(out_dim)
        # if the dim of output is not the same as input, then a downsample is needed for the residual approach
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        # keep the original input
        ori_in = x
        # forward propogation
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        # transform the input if needed
        if self.downsample is not None:
            identity = self.downsample(x)
        # add the activate (residual)
        out += ori_in
        out = self.relu(out)
        return out

---

## Implement the ***Bottleneck***
- Use in ResNet-50, ResNet-101, ResNet-152
- Expansion = 4: The dimension will be expended as 4 times by the reconstruction 1x1 conv layer
- To lower computation cost, Bottleneck:
    - First: lower the dimension by a 1x1 conv layer
    - Second: do feature extraction with a 3x3 conv layer
    - Third: reconstruction the dimension with a 1x1 conv layer 
- Most of the time we expect in_dim = out_dim, if this is not hold, we have to downsample

In [42]:
class Bottleneck(nn.Module):
    # the expansion scalar
    expansion = 4

    def __init__(self, in_dim, out_dim, stride=1, downsample=None, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        # set the norm layers and dim inside the block
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        # 1x1 conv and transform the dim of the matrix
        self.conv1 = nn.Conv2d(in_dim, out_dim, kernel_size=1, stride=stride, bias=False)
        self.bn1 = norm_layer(out_dim)
        # 3x3 conv, dim not changed
        self.conv2 = nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=stride, padding=1, bias=False, dilation=dilation)
        self.bn2 = norm_layer(out_dim)
        # 1x1 conv, raise dim to expansion target
        self.conv3 = nn.Conv2d(out_dim, out_dim * self.expansion, kernel_size=1, stride=stride, bias=False)
        self.bn2 = norm_layer(out_dim * self.expansion)
        # activation function
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
    
    def forward(self, x):
        # keep the origin input
        ori_in = x
        # forwar propagation
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        # transform the input if needed
        if self.downsample is not None:
            identity = self.downsample(x)
        # add the activate (residual)
        out += ori_in
        out = self.relu(out)
        return out

---

## Construct the ***ResNet*** Architecture
- As we do not implement more complex architecture, we do not really need to do dilation conv.
- Notice if stride = 2: This means the first two dimension of the feature matrix will be divided by 2

In [34]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            self.norm_layer = nn.BatchNorm2d
        else:
            self.norm_layer = norm_layer
        # define parameters
        self.in_dim = 64
        # default not using dilation (do continous conv instead of skipping d conv [conv元素连续摘取，中间不跳过元素])
        self.dilation = 1
        # define the head part of the resnet
        self.conv1 = nn.Conv2d(3, self.in_dim, kernel_size=7, stride=2, padding=3, bias=False, dilation=self.dilation)
        self.bn1 = self.norm_layer(self.in_dim)
        # define the activation and pooling
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # define the four blocks
        self.layers1 = self._make_layers(block, 64, layers[0])
        self.layers2 = self._make_layers(block, 128, layers[1], stride=2)
        self.layers3 = self._make_layers(block, 256, layers[2], stride=2)
        self.layers4 = self._make_layers(block, 512, layers[3], stride=2)
        # average pooling layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # full connect layer
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layers(self, block, out_dim, block_num, stride=1, dilation=1):
        # the dim changed during this block due to the stride or the output downsample our feature matrix
        downsample = None
        if stride != 1 or self.in_dim != (out_dim * block.expansion):
            downsample = nn.Sequential(
                nn.Conv2d(self.in_dim, out_dim * block.expansion, kernel_size=1, stride=stride, bias=False),
                self.norm_layer(out_dim * block.expansion)
            )
        # construct the layers
        layers = []
        layers.append(block(self.in_dim, out_dim, stride, downsample))
        # changed the input dim to the output dim of the current block
        self.in_dim = out_dim * block.expansion 
        # for the rest of the upcoming blocks, the in_dim = out_dim
        for _ in range(1, block_num):
            layers.append(block(self.in_dim, out_dim))
        # return the layers
        return nn.Sequential(*layers)

    def forward(self, x):
        # stage head: before entering the blocks
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)
        # stage blocks: entering the four blocks
        out = self.layers1(out)
        out = self.layers2(out)
        out = self.layers3(out)
        out = self.layers4(out)
        # stage end: do the avgpooling and fc classification
        out = self.avgpool(out)
        # flat the whole matrix for the umpcoming fc layer
        out = torch.flatten(out)
        out = self.fc(out)
        return out

---

## Create different ResNet and check its structure
- Also, pretrained models can be loaded via urls below


In [35]:
import torch.utils.model_zoo as model_zoo
# Different pretrained model url path
# Retrieved from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}

---

### ResNet-18
- BasicBlock(3x3 conv layers x2), 7x7 conv x1, FC layer x1
    - 1 + 2 * (2 + 2 + 2 + 2) + 1 = 18
- A pretty shallow network
    - Have a pretty resonable performance on small object detections

In [7]:
def ResNet18(pretrained=False, **kwards):
    resNet18 = ResNet(BasicBlock, [2, 2, 2, 2], **kwards)
    if pretrained:
        resNet18.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return resNet18

### ResNet-34
- BasicBlock(3x3 conv layers x2), 7x7 conv x1, FC layer x1
    - 1 + 2 * (3 + 4 + 6 + 3) + 1 = 34

In [8]:
def ResNet34(pretrained=False, **kwards):
    resNet34 = ResNet(BasicBlock, [3, 4, 6, 3], **kwards)
    if pretrained:
        resNet34.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    return resNet34

### ResNet-50
- Bottleneck(1x1 conv x2, 3x3 conv x1), 7x7 conv x1, FC layer x1
    - 1 + 3 * (3 + 4 + 6 + 3) + 1 = 50
- It's a really classic choice of backbones
    - Can serve as a baseline

In [9]:
def ResNet50(pretrained=False, **kwards):
    resNet50 = ResNet(Bottleneck, [3, 4, 6, 3], **kwards)
    if pretrained:
        resNet50.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    return resNet50

### ResNet-101
- Bottleneck(1x1 conv x2, 3x3 conv x1), 7x7 conv x1, FC layer x1
    - 1 + 3 * (3 + 4 + 23 + 3) + 1 = 101

In [10]:
def ResNet101(pretrained=False, **kwards):
    resNet101 = ResNet(Bottleneck, [3, 4, 23, 3], **kwards)
    if pretrained:
        resNet101.load_state_dict(model_zoo.load_url(model_urls['resNet101']))
    return resNet101

### ResNet-152
- Bottleneck(1x1 conv x2, 3x3 conv x1), 7x7 conv x1, FC layer x1
    - 1 + 3 * (3 + 8 + 36 + 3) + 1 = 152
- Compare with ResNet50, actually the boost in performance is not that noticeable

In [11]:
def ResNet152(pretrained=False, **kwards):
    ResNet152 = ResNet(Bottleneck, [3, 8, 36, 3], **kwards)
    if pretrained:
        ResNet152.load_state_dict(model_zoo.load_url(model_urls['ResNet152']))
    return ResNet152

---

## Check the structure of ResNet-18 and ResNet-50

In [44]:
mynet = ResNet18()
print(mynet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layers1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kern

In [45]:
mynet = ResNet50()
print(mynet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layers1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_ru