In [169]:
import torch
from torch import nn
from torchinfo import summary
import torch.nn.functional as F
from torchvision import models
from typing import Type, Union, List, Optional

![ResNet_Table.png](attachment:9f89ad55-a7e3-45a6-a207-024320d3cc10.png)

In [170]:
class BasicConv2d(nn.Module):
    def __init__(self, 
                 in_channels: int, out_channels: int, 
                 zero_initial:bool=False, 
                 bias=False, **kwargs):
        super().__init__()
        bn = nn.BatchNorm2d(out_channels)
        if zero_initial:
            nn.init.constant_(bn.weight, 0.)
            nn.init.constant_(bn.bias, 0.)
        # 由於在殘差單元的設計中可能出現bn後面不接ReLU的情況因此不寫上ReLU
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, bias=bias, **kwargs),
                                   bn)
    def forward(self, x):
        return self.conv1(x)

In [171]:
class Conv3x3(nn.Module):
    def __init__(self, 
                 in_channels: int, out_channels: int, 
                 zero_initial:bool=False, 
                 bias=False, **kwargs):
        super().__init__()
        bn = nn.BatchNorm2d(out_channels)
        if zero_initial:
            nn.init.constant_(bn.weight, 0.)
            nn.init.constant_(bn.bias, 0.)
        # 由於在殘差單元的設計中可能出現bn後面不接ReLU的情況因此不寫上ReLU
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 3, bias=bias, **kwargs),
                                   bn)
    def forward(self, x):
        return self.conv1(x)

In [172]:
class Conv1x1(nn.Module):
    def __init__(self, 
                 in_channels: int, out_channels: int, 
                 zero_initial: bool=False, 
                 bias=False, **kwargs):
        super().__init__()
        bn = nn.BatchNorm2d(out_channels)
        if zero_initial:
            nn.init.constant_(bn.weight, 0.)
            nn.init.constant_(bn.bias, 0.)
        # 由於在殘差單元的設計中可能出現bn後面不接ReLU的情況因此不寫上ReLU
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=bias, **kwargs),
                                   bn)
    def forward(self, x):
        return self.conv1(x)

In [173]:
class ResidualUnit(nn.Module):
    def __init__(self, 
                 out_: int, 
                 stride1: int, 
                 in_: Optional[int]=None):
        super().__init__()  
        # h+2p-k/s + 1 = h/2 ---> h+2p-k + 2 =h ---> p = k-2  圖片大小砍半(s=2)
        # h+2p-k/s + 1 = h ---> 2p-k + 1 =0 ---> p = (k-1)/2  圖片大小相同(s=1)
        if not in_:
            if stride1 == 2: 
                in_ = int(out_ / 2)
            elif stride1 == 1:
                in_ = out_
        self.stride = stride1
        # 僅第一個入數會減半圖片尺寸，第二個輸出尺寸必定不變
        # in_channels, out_channels, zero_initialize=False, bias=False, **kwargs
        self.conv = nn.Sequential(Conv3x3(in_, out_, stride=stride1, padding=1),
                                  nn.ReLU(inplace=True),
                                  Conv3x3(out_, out_, zero_initial=True, stride=1, padding=1)
                                 )
        # 僅圖片尺寸需要減半時需要用到skipconv
        if stride1 == 2:
            self.skipconv = Conv1x1(in_, out_, stride=stride1, padding=0)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x1 = self.conv(x)
        if self.stride == 2:
            x = self.skipconv(x)
        return self.relu(x1 + x)

In [174]:
class Bottleneck(nn.Module):
    def __init__(self, 
                 middle_out: int, 
                 stride1: int, 
                 in_:Optional[int]=None):  
        super().__init__()
        # 為了寫一個make_layers得省略調in_參數讓他自動計算
        out_ = 4*middle_out
        if not in_:
            if stride1 == 2:
                in_ =  2 * middle_out
            elif stride1 == 1:
                in_ =  4 * middle_out
        self.block = nn.Sequential(Conv1x1(in_, middle_out),
                                   nn.ReLU(inplace=True),
                                   Conv3x3(middle_out, middle_out, stride=stride1, padding=1),
                                   nn.ReLU(inplace=True),
                                   Conv1x1(middle_out, out_, zero_initial=True)
                                  )
        self.in_ = in_
        self.out_ = out_
        if in_ != out_:
            self.skipconv = Conv1x1(in_, out_, stride=stride1)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x1 = self.block(x)
        if self.in_ != self.out_:
            x = self.skipconv(x)
        return self.relu(x1 + x)

In [175]:
def make_layers(block: Type[Union[ResidualUnit, Bottleneck]], 
                num_blocks: int, 
                out_or_middleout: int, 
                afterconv1: bool = False):
    layers = list()
    
    if afterconv1:
        layers.append(block(out_or_middleout, stride1=1, in_=64))
    else:
        layers.append(block(out_or_middleout, stride1=2))
        
    for _ in range(num_blocks-1):
        layers.append(block(out_or_middleout, stride1=1))
    
    return nn.Sequential(*layers)

In [176]:
class ResNet(nn.Module):
    def __init__(self, 
                 block:Type[Union[ResidualUnit, Bottleneck]], 
                 layers: List[int], 
                 num_class: int):
        super().__init__()
        # (h+2p-k)/s + 1=h/2 --> 2p-k+2=0 p=2.5 --> (224+6-7)/2 + 1 = 111.5+1=112.5
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(3, 2, padding=1)
        
        self.layer1 = make_layers(block, layers[0], 64, True)
        self.layer2 = make_layers(block, layers[1], 128, False)
        self.layer3 = make_layers(block, layers[2], 256, False)
        self.layer4 = make_layers(block, layers[3], 512, False)
        if block == ResidualUnit:
            in_linear = 512
        elif block == Bottleneck:
            in_linear = 2048
        self.in_linear = in_linear
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.linear = nn.Linear(in_linear, num_class)
    def forward(self, x):
        x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
        x = self.layer4(self.layer3(self.layer2(self.layer1(x))))
        x = self.avgpool(x)
        x = self.linear(x.view(-1, self.in_linear))
        x = F.softmax(x, dim=1)
        return x

In [177]:
mynet = ResNet(Bottleneck, [3, 4, 6, 3], 1000)
net = models.resnet50()

In [178]:
mynet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (block): Sequential(
        (0): Conv1x1(
          (conv1): Sequential(
            (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (1): ReLU(inplace=True)
        (2): Conv3x3(
          (conv1): Sequential(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (3): ReLU(inplace=True)
        (4): Conv1x1(
          (conv1): Seq

In [179]:
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [180]:
summary(mynet, [10, 3, 224, 224], depth=1, device="cpu")

Layer (type:depth-idx)                             Output Shape              Param #
ResNet                                             [10, 1000]                --
├─Conv2d: 1-1                                      [10, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                                 [10, 64, 112, 112]        128
├─ReLU: 1-3                                        [10, 64, 112, 112]        --
├─MaxPool2d: 1-4                                   [10, 64, 56, 56]          --
├─Sequential: 1-5                                  [10, 256, 56, 56]         215,808
├─Sequential: 1-6                                  [10, 512, 28, 28]         1,219,584
├─Sequential: 1-7                                  [10, 1024, 14, 14]        7,098,368
├─Sequential: 1-8                                  [10, 2048, 7, 7]          14,964,736
├─AdaptiveAvgPool2d: 1-9                           [10, 2048, 1, 1]          --
├─Linear: 1-10                                     [10, 1000]                2,049,0

In [181]:
summary(net, [10, 3, 224, 224], depth=1, device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [10, 1000]                --
├─Conv2d: 1-1                            [10, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                       [10, 64, 112, 112]        128
├─ReLU: 1-3                              [10, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [10, 64, 56, 56]          --
├─Sequential: 1-5                        [10, 256, 56, 56]         215,808
├─Sequential: 1-6                        [10, 512, 28, 28]         1,219,584
├─Sequential: 1-7                        [10, 1024, 14, 14]        7,098,368
├─Sequential: 1-8                        [10, 2048, 7, 7]          14,964,736
├─AdaptiveAvgPool2d: 1-9                 [10, 2048, 1, 1]          --
├─Linear: 1-10                           [10, 1000]                2,049,000
Total params: 25,557,032
Trainable params: 25,557,032
Non-trainable params: 0
Total mult-adds (G): 40.89
Input size (