<a href="https://colab.research.google.com/github/GzpTez0514/-/blob/main/Pytorch%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A010_%E6%9E%84%E7%AD%91%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 卷积层
import torch
import torch.nn as nn

data = torch.ones(size=(10, 3, 28, 28)) # 10张尺寸为28*28的，拥有3个通道的图像
conv1 = nn.Conv2d(in_channels=3, 
          out_channels=6, # 全部通道的扫描值被合并，6个卷积核形成6个feature map
          kernel_size=3 # 表示3x3的卷积核
          )

conv2 = nn.Conv2d(in_channels=6, # 对下一层网络来说，输入的是上层生成的6个feature map
          out_channels=4, # 全部特征图的扫描值被合并，4个卷积核形成4个新的feature map
          kernel_size=3)

print(conv1(data).shape)
print(conv2(conv1(data)).shape)


torch.Size([10, 6, 26, 26])
torch.Size([10, 4, 24, 24])


In [None]:
# 特征图的尺寸 （H + 2P -K）/ S + 1
data = torch.ones(size=(10, 3, 28, 28))
conv1 = nn.Conv2d(3, 6, 3)
conv2 = nn.Conv2d(6, 4, 3)
conv3 = nn.Conv2d(4, 16, 5, stride=2, padding=1) 
conv4 = nn.Conv2d(16, 3, 5, stride=3, padding=2)


In [None]:
# VGG16的复现
# (卷积x2 + 池化) x2 -> (卷积x3 + 池化) x3 —> FC层x3 每组卷积+池化算一个block
!pip install torchinfo
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchinfo import summary

class VGG16(nn.Module):
  def __init__(self):
    super().__init__()
    # block1
    self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
    self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
    self.pool1 = nn.MaxPool2d(2, 2)

    # block2
    self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
    self.conv4 = nn.Conv2d(128, 128, 3, padding=1)
    self.pool2 = nn.MaxPool2d(2, 2)

    # block3
    self.conv5 = nn.Conv2d(128, 256, 3, padding=1)
    self.conv6 = nn.Conv2d(256, 256, 3, padding=1)
    self.conv7 = nn.Conv2d(256, 256, 3, padding=1)
    self.pool3 = nn.MaxPool2d(2, 2)

    # block4
    self.conv8 = nn.Conv2d(256, 512, 3, padding=1)
    self.conv9 = nn.Conv2d(512, 512, 3, padding=1)
    self.conv10 = nn.Conv2d(512, 512, 3, padding=1)
    self.pool4 = nn.MaxPool2d(2, 2)

    # block5
    self.conv11 = nn.Conv2d(512, 512, 3, padding=1)
    self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
    self.conv13 = nn.Conv2d(512, 512, 3, padding=1)
    self.pool5 = nn.MaxPool2d(2,2)

    # FC层
    self.linear1 = nn.Linear(512*7*7, 4096)
    self.linear2 = nn.Linear(4096, 4096)
    self.linear3 = nn.Linear(4096, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = self.pool1(x)

    x = F.relu(self.conv3(x))
    x = F.relu(self.conv4(x))
    x = self.pool2(x)

    x = F.relu(self.conv5(x))
    x = F.relu(self.conv6(x))
    x = F.relu(self.conv7(x))
    x = self.pool3(x)

    x = F.relu(self.conv8(x))
    x = F.relu(self.conv9(x))
    x = F.relu(self.conv10(x))
    x = self.pool4(x)

    x = F.relu(self.conv11(x))
    x = F.relu(self.conv12(x))
    x = F.relu(self.conv13(x))
    x = self.pool5(x)
    
    x = x.view(-1, 512*7*7)

    x = F.relu(self.linear1(F.dropout(x, p=0.5)))
    x = F.relu(self.linear2(F.dropout(x, p=0.5)))

    output = F.softmax(self.linear3(x), dim=1)
    return output

vgg = VGG16()
summary(vgg, input_size=(10, 3, 224, 224), device='cpu')


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Layer (type:depth-idx)                   Output Shape              Param #
VGG16                                    [10, 10]                  --
├─Conv2d: 1-1                            [10, 64, 224, 224]        1,792
├─Conv2d: 1-2                            [10, 64, 224, 224]        36,928
├─MaxPool2d: 1-3                         [10, 64, 112, 112]        --
├─Conv2d: 1-4                            [10, 128, 112, 112]       73,856
├─Conv2d: 1-5                            [10, 128, 112, 112]       147,584
├─MaxPool2d: 1-6                         [10, 128, 56, 56]         --
├─Conv2d: 1-7                            [10, 256, 56, 56]         295,168
├─Conv2d: 1-8                            [10, 256, 56, 56]         590,080
├─Conv2d: 1-9                            [10, 256, 56, 56]         590,080
├─MaxPool2d: 1-10                        [10, 256, 28, 28]         --
├─Conv2d: 1-11                           [10, 512, 28, 28]         1,180,160
├─Conv2d: 1-12                           [10, 5

In [None]:
# 计算LeNet5以及AlexNet各层的感受野的大小
!pip install torchinfo
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchinfo import summary

data = torch.ones(size=(10, 1, 32, 32))

class LeNet5(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 6, 5)
    self.pool1 = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.pool2 = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)

  def forward(self, x):
    x = self.tanh(self.conv1(x))
    x = self.pool1(x)
    x = self.tanh(self.conv2(x))
    x = self.pool2(x)
    x = x.view(-1, 16*5*5)
    x = F.tanh(self.fc1(x))
    output = F.softmax(self.fc2(x), dim=1)
    output = F.softmax(x.view(-1, 16*5*5), dim=1)
    
net = LeNet5()

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# 计算卷积层中参数的数量
conv1 = nn.Conv2d(3, 6, 3)
conv2 = nn.Conv2d(6, 4, 3)

print(conv1.weight.numel())
print(conv1.bias.numel())
print(conv2.weight.numel())
print(conv2.bias.numel())

conv3 = nn.Conv2d(4, 16, 5, stride=2, padding=1) #(5*5*4)*16+16
conv4 = nn.Conv2d(16, 3, 5, stride=3, padding=2) #(5*5*16)*3+3
print(conv3.weight.numel())
print(conv4.weight.numel())

162
6
216
4
1600
1200


In [None]:
# 使用nn.Sequential
import torch
import torch.nn as nn
data = torch.ones(size=(10, 3, 229, 229))
# 不使用类，直接将需要串联的网络、函数等信息写在一个‘序列'里面
net = nn.Sequential(nn.Conv2d(3, 6, 3), nn.ReLU(inplace=True),
           nn.Conv2d(6, 4, 3), nn.ReLU(inplace=True),
           nn.MaxPool2d(2),
           nn.Conv2d(4, 16, 5, stride=2, padding=1), nn.ReLU(inplace=True),
           nn.Conv2d(16, 3, 5, stride=3, padding=2), nn.ReLU(inplace=True),
           nn.MaxPool2d(2))

net(data).shape

torch.Size([10, 3, 9, 9])

In [None]:
# 使用nn.Sequential复现VGG16
!pip install torchinfo
import torch
import torch.nn as nn
from torchinfo import summary
class VGG16(nn.Module):
  def __init__(self):
    super().__init__()
    self.features_ = nn.Sequential(nn.Conv2d(3, 64, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True),
                    nn.MaxPool2d(2),
                    
                    nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True),
                    nn.MaxPool2d(2),
                    
                    nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True),
                    nn.MaxPool2d(2),
                    
                    nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True),
                    nn.MaxPool2d(2),
                    
                    nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True),
                    nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True),
                    nn.MaxPool2d(2))
    
    self.clf_ = nn.Sequential(nn.Dropout(0.5),
                  nn.Linear(512*7*7, 4096), nn.ReLU(inplace=True),
                  nn.Dropout(0.5),
                  nn.Linear(4096, 4096), nn.ReLU(inplace=True),
                  nn.Linear(4096, 1000), nn.Softmax(dim=1))
  
  def forward(self, x):
    x = self.features_(x)
    x = x.view(-1, 512*7*7)
    output = self.clf_(x)
    return output

vgg = VGG16()
summary(vgg, input_size=(10, 3, 224, 224), device='cpu')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Layer (type:depth-idx)                   Output Shape              Param #
VGG16                                    [10, 1000]                --
├─Sequential: 1-1                        [10, 512, 7, 7]           --
│    └─Conv2d: 2-1                       [10, 64, 224, 224]        1,792
│    └─ReLU: 2-2                         [10, 64, 224, 224]        --
│    └─Conv2d: 2-3                       [10, 64, 224, 224]        36,928
│    └─ReLU: 2-4                         [10, 64, 224, 224]        --
│    └─MaxPool2d: 2-5                    [10, 64, 112, 112]        --
│    └─Conv2d: 2-6                       [10, 128, 112, 112]       73,856
│    └─ReLU: 2-7                         [10, 128, 112, 112]       --
│    └─Conv2d: 2-8                       [10, 128, 112, 112]       147,584
│    └─ReLU: 2-9                         [10, 128, 112, 112]       --
│    └─MaxPool2d: 2-10                   [10, 128, 56, 56]         --
│    └─Conv2d: 2-11                      [10, 256, 56, 56]         29

In [None]:
# NiN网络的复现
!pip install torchinfo
import torch 
import torch.nn as nn
from torchinfo import summary

data = torch.ones(size=(10, 3, 32, 32))

class NiN(nn.Module):
  def __init__(self):
    super().__init__()
    self.block1 = nn.Sequential(nn.Conv2d(3, 192, 5, padding=2), nn.ReLU(inplace=True),
                   nn.Conv2d(192, 160, 1), nn.ReLU(inplace=True),
                   nn.Conv2d(160, 96, 1), nn.ReLU(inplace=True),
                   nn.MaxPool2d(3, stride=2),
                   nn.Dropout(0.25))
    
    self.block2 = nn.Sequential(nn.Conv2d(96, 192, 5, padding=2), nn.ReLU(inplace=True),
                   nn.Conv2d(192, 192, 1), nn.ReLU(inplace=True),
                   nn.Conv2d(192, 192, 1), nn.ReLU(inplace=True),
                   nn.MaxPool2d(3, stride=2), 
                   nn.Dropout(0.25))
    
    self.block3 = nn.Sequential(nn.Conv2d(192, 192, 3, padding=1), nn.ReLU(inplace=True),
                   nn.Conv2d(192, 192, 1), nn.ReLU(inplace=True),
                   nn.Conv2d(192, 10, 1), nn.ReLU(inplace=True),
                   nn.AvgPool2d(7, stride=1),
                   nn.Softmax(dim=1))
    
  def forward(self, x):
    output = self.block3(self.block2(self.block1(x)))
    return output

net = NiN()
net(data).shape
summary(net, input_size=(10, 3, 32, 32))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Layer (type:depth-idx)                   Output Shape              Param #
NiN                                      [10, 10, 1, 1]            --
├─Sequential: 1-1                        [10, 96, 15, 15]          --
│    └─Conv2d: 2-1                       [10, 192, 32, 32]         14,592
│    └─ReLU: 2-2                         [10, 192, 32, 32]         --
│    └─Conv2d: 2-3                       [10, 160, 32, 32]         30,880
│    └─ReLU: 2-4                         [10, 160, 32, 32]         --
│    └─Conv2d: 2-5                       [10, 96, 32, 32]          15,456
│    └─ReLU: 2-6                         [10, 96, 32, 32]          --
│    └─MaxPool2d: 2-7                    [10, 96, 15, 15]          --
│    └─Dropout: 2-8                      [10, 96, 15, 15]          --
├─Sequential: 1-2                        [10, 192, 7, 7]           --
│    └─Conv2d: 2-9                       [10, 192, 15, 15]         460,992
│    └─ReLU: 2-10                        [10, 192, 15, 15]         -

In [None]:
# GoogleNet的复现
!pip install torchinfo
import torch
from torch import nn
from torchinfo import summary

class BasicConv2d(nn.Module):
  def __init__(self,in_channels, out_channels, **kwargs):
    super().__init__()
    self.conv = nn.Sequential(nn.Conv2d(in_channels, out_channels, bias=False, **kwargs), 
                  nn.BatchNorm2d(out_channels), 
                  nn.ReLU(inplace=True))
    
  def forward(self, x):
    x = self.conv(x)
    return x

# 测试
BasicConv2d(2, 10, kernel_size=3)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


BasicConv2d(
  (conv): Sequential(
    (0): Conv2d(2, 10, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
)

In [None]:
from torch.nn.modules.container import Sequential
# 接下来，我们需要定义Inception块，由于Inception块是并联结构，存在4个branchs，所以不能够使用nn.Sequential进行打包
class Inception(nn.Module):
  def __init__(self, in_channels: int,
             ch1x1: int, 
             ch3x3red: int,
             ch3x3: int,
             ch5x5red: int,
             ch5x5: int,
             pool_proj: int):
    super().__init__()
    self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
    self.branch2 = nn.Sequential(BasicConv2d(in_channels, ch3x3red, kernel_size=1), 
                   BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1))

    self.branch3 = nn.Sequential(BasicConv2d(in_channels, ch5x5red, kernel_size=1), 
                   BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2))

    self.branch4 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1), 
                   BasicConv2d(in_channels, pool_proj, kernel_size=1))
  
  def forward(self, x):
    branch1 = self.branch1(x)
    branch2 = self.branch2(x)
    branch3 = self.branch3(x)
    branch4 = self.branch4(x)
    outputs = [branch1, branch2, branch3, branch4]
    return torch.cat(outputs, dim=1)

# 测试
Inception(256, 64, 96, 128, 16, 32, 32) #这是inception3a的参数

Inception(
  (branch1): BasicConv2d(
    (conv): Sequential(
      (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (branch2): Sequential(
    (0): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(256, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (1): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
  )
  (branch3): Sequential(
    (0): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(256, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): 

In [None]:
# 辅助分类器
class AuxClf(nn.Module):
  def __init__(self,in_channels, num_classes, **kwargs):
    super().__init__()
    self.features_ = nn.Sequential(nn.AvgPool2d(kernel_size=5, stride=3), 
                    BasicConv2d(in_channels, 128, kernel_size=1))
    
    self.clf_ = nn.Sequential(nn.Linear(4*4*128, 1024), 
                  nn.ReLU(inplace=True),
                  nn.Dropout(0.7),
                  nn.Linear(1024, num_classes))
    
  def forward(self, x):
    x = self.features_(x)
    x = x.view(-1, 4*4*128)
    x = self.clf_(x)
    return x

# 测试
AuxClf(512, 1000)

AuxClf(
  (features_): Sequential(
    (0): AvgPool2d(kernel_size=5, stride=3, padding=0)
    (1): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
  )
  (clf_): Sequential(
    (0): Linear(in_features=2048, out_features=1024, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.7, inplace=False)
    (3): Linear(in_features=1024, out_features=1000, bias=True)
  )
)

In [None]:
# 定义好三个单独的类之后，我们再实现GoogleNet的完整架构
class GoogleNet(nn.Module):
  def __init__(self, num_classes=1000, blocks=None):
    super().__init__()
    if blocks is None:
      blocks = [BasicConv2d, Inception, AuxClf]
    conv_block = blocks[0]
    inception_block = blocks[1]
    aux_clf_block = blocks[2]

    # block1
    self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
    self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

    # block2
    self.conv2 = conv_block(64, 64, kernel_size=1)
    self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
    self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

    # block3
    self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
    self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
    self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

    # block4
    self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
    self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
    self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
    self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
    self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
    self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

    # block5
    self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
    self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)

    # auxclf
    self.aux1 = aux_clf_block(512, num_classes)
    self.aux2 = aux_clf_block(528, num_classes)

    # clf
    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    # 自适应平局池化， 可以自动为我们输出(1, 1)尺寸的特征图
    # 在这里就相当于全局平均池化了
    self.dropout = nn.Dropout(0.4)
    self.fc = nn.Linear(1024, num_classes)

  def forward(self, x):
    # block1
    x = self.conv1(x)
    x = self.maxpool1(x)

    # block2
    x = self.conv2(x)
    x = self.conv3(x)
    x = self.maxpool2(x)

    # block3
    x = self.inception3a(x)
    x = self.inception3b(x)
    x = self.maxpool3(x)

    # block4
    x = self.inception4a(x)
    aux1 = self.aux1(x)

    x = self.inception4b(x)
    x = self.inception4c(x)
    x = self.inception4d(x)
    aux2 = self.aux2(x)

    x = self.inception4e(x)
    x = self.maxpool4(x)

    # block5
    x = self.inception5a(x)
    x = self.inception5b(x)

    # clf
    x = self.avgpool(x)
    x = torch.view(-1, )
    x = self.dropout(x)
    x = self.fc(x)

    return x, aux2, aux1

In [3]:
# 复现残差网络(3x3卷积层和1x1卷积层)，每个卷积层后都要跟上BN层，而BN层上可以完成参数初始化
!pip install torchinfo
import torch
import torch.nn as nn
from typing import Type, Union, List, Optional
from torchinfo import summary

def conv3x3(in_, out_, stride=1, initialzero=False):
  bn = nn.BatchNorm2d(out_)
  if initialzero == True:
    nn.init.constant_(bn.weight, 0)
  return nn.Sequential(nn.Conv2d(in_, out_, kernel_size=3, stride=stride, padding=1, bias=False), bn)
  # kernel_size一定是3，搭配的padding一定是1，我们将这些参数写死，确保这些参数无法进行修改

def conv1x1(in_, out_, stride=1, initialzero=False):
  bn = nn.BatchNorm2d(out_)
  if initialzero == False:
    nn.init.constant_(bn.weight, 0)
  return nn.Sequential(nn.Conv2d(in_, out_, kernel_size=1, stride=stride, padding=0, bias=False), bn)
  # kernel_size一定是1，搭配的padding一定是0

# 查看函数返回的结果，虽然我们定义的是函数，但实际上最后返回的是一个nn.Sequential类
conv1x1(2, 10, True)

# 测试，initialzero参数是否有效
# 随意设置的参数值
conv1x1(2, 10, 1, True)[1].weight
conv1x1(2, 10, 1, False)[1].weight

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [4]:
# 定义基础残差单元类：一个残差单元中只包含两个卷积层和一个加和功能
class ResidualUnit_draft(nn.Module):
  def __init__(self, in_:int, out_:int): 
    super().__init__()
    
    # 拟合部分，输出F(x)
    self.fit = nn.Sequential(conv3x3(in_, out_), 
                 nn.ReLU(inplace=True),
                 conv3x3(out_, out_))
    
    # 最后的H(x)需要使用的ReLU函数
    self.relu = nn.ReLU(inplace=True)
  
  def forward(self, x):
    fx = self.fit_(x) #拟合结果
    x = x # 跳跃连接
    hx = self.relu(fx + x)
    return hx

In [None]:
# 考虑初始化和步长
class ResidualUnit_draft(nn.Module):
  def __init__(self, in_:int, out_:int, stride1:int=1):
    super().__init__()
    self.stride1 = stride1

    # 拟合部分，输出F(x)
    self.fit_ = nn.Sequential(conv3x3(in_, out_, stride=stride1),
                  nn.ReLU(inplace=True),
                  conv3x3(in_, out_, initialzero=True))
    # 跳跃连接，输出x
    self.skipconv = conv1x1(in_, out_, stride=stride1)
    # 最后的H(x)需要使用的ReLU函数
    self.relu = nn.ReLU(inplace=True)

  def forward(self, x):
    fx = self.fit_(x) # 拟合结果
    if self.stride1 != 1:
      x = self.skipconv(x) # 跳跃连接
    hx = self.relu(fx + x) 
    return hx

In [9]:
# 特征图尺寸的变化，消去参数in_，保留out_，就可以将现在写的残差单元类和之后要写的瓶颈架构类打包在一个类中
class ResidualUnit(nn.Module):
  # 根据是否能将特征图折半，执行不同流程的残差单元
  # 如果将特征特尺寸折半，则将输出特征图尺寸翻倍，并在skipconnection上放置1x1卷积网络
  # 反之，则不改变特征图尺寸，也不使用任何1x1卷积网络
  def __init__(self, out_, stride1:int=1, in_:Optional[int]=None):
    super().__init__()
    self.stride1 = stride1 # stride1:第一个卷积层/跳跃层中1x1卷积的步长
    if stride1 != 1:
      in_ = int(out_ / 2)
    else:
      in_ = out_
    
    self.fit_ = nn.Sequential(conv3x3(in_, out_, stride1),
                  nn.ReLU(inplace=True),
                  conv3x3(out_, out_, initialzero=True))
    
    self.skipconv = conv1x1(in_, out_, stride1)
    self.relu = nn.ReLU(inplace=True)

  def forward(self, x):
    fx = self.fit_(x)
    if self.stride1 != 1:
      x = self.skipconv(x)
    hx = self.relu(fx + x)
    return hx

# 测试
data = torch.ones(size=(10, 64, 56, 56))
conv3_x_18_0 = ResidualUnit(128, stride1=2) # 特征图尺寸折半，特征图数量加倍
print(conv3_x_18_0(data).shape)

conv2_x_18_0 = ResidualUnit(64) # 特征图尺寸不变，特征图数量也不变
print(conv2_x_18_0(data).shape)

torch.Size([10, 128, 28, 28])
torch.Size([10, 64, 56, 56])


In [18]:
# 瓶颈架构的基础架构复现
class Bottleneck(nn.Module):
  def __init__(self, middle_out:int, stride1:int=1, in_:Optional[int]=None):
    '''
    in_: 输入瓶颈结构的特征图的数量，仅在conv1之后紧跟的瓶颈结构才进行填写，其他时候不填写
    stride1: 第一个卷积层/跳跃连接中1x1卷积层的步长

    '''

    super().__init__()
    self.stride1 = stride1
    
    # 最终的输出量 = 中间输出量的4倍
    out_ = middle_out * 4

    if in_ == None:
      # 不是conv1后紧跟的第一个瓶颈结构
      # 需要缩小特征图：输入量 = 中间量 * 2
      # 不需要缩小特征图，输入量 = 中间输出量 * 4
      if stride1 != 1:
        in_ = middle_out * 2
      else:
        in_ = middle_out * 4

    self.fit_ = nn.Sequential(conv1x1(in_, middle_out, stride1),
                  nn.ReLU(inplace=True),
                  conv3x3(middle_out, middle_out),
                  nn.ReLU(inplace=True),
                  conv1x1(middle_out, out_, initialzero=True))
                  # 最后一个1x1卷积层的输出一定是输入的4倍
   
    self.skipconv = conv1x1(in_, out_, stride1)
    self.relu = nn.ReLU(inplace=True)

  def forward(self, x):
    fx = self.fit_(x)
    # 对瓶颈架构而言，输入x和输出F(x)的特征图的数量一定不一致，因此x必须要经过1x1卷积层的池化
    x = self.skipconv(x)
    hx = self.relu(x + fx)
    return hx

# 测试
data1 = torch.ones(10, 64, 56, 56)
# 是conv1后紧跟的第一个瓶颈结构
conv2_x_101_0 = Bottleneck(in_=64, middle_out=64)
print(conv2_x_101_0(data1).shape)

data2 = torch.ones(10, 256, 56, 56)
# 不是conv1后紧跟的第一个瓶颈结构，但是需要缩小特征图尺寸
conv3_x_101_0 = Bottleneck(middle_out=128, stride1=2)
print(conv3_x_101_0(data2).shape) # 输出翻2倍并缩小特征图尺寸至一半

data3 = torch.ones(10, 512, 28, 28)
# 不是conv1后紧跟的第一个瓶颈结构，也不需要缩小特征图尺寸
conv3_x_101_1 = Bottleneck(128)
print(conv3_x_101_1(data3).shape)

torch.Size([10, 256, 56, 56])
torch.Size([10, 512, 28, 28])
torch.Size([10, 512, 28, 28])
