# resnet 残差网络
随着我们设计了愈来愈深的网络，深刻理解“新添加的层如何提升神经网络的性能”变得至关重要，更重要的是设计网络的能力，在这种网络中，添加层会使网络更具有表现力。

何凯明等人提出的ResNet在2015年的ImageNet图像识别挑战赛中夺魁，并深刻影响了后面的深度神经网络的设计。

残差网络的核心思想是：每个附加层都应该更容易地包含原始函数作为其元素之一。于是残差快ResBlock便诞生了。

In [5]:
import torch as t
import torch.nn as nn
from torch.nn.functional import relu

In [6]:
class Residual(nn.Module):
    def __init__(self,input_channels,num_channels,use_1x1conv=False,strides=1)->None:
        super().__init__()

        self.conv1=nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1,stride=strides)
        self.conv2=nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)

        # 如果使用1x1conv调整输入或者改变通道数，那么就引入额外的卷积层
        if use_1x1conv:
            self.conv3 =nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
        else:
            self.conv3=None
        
        self.bn1=nn.BatchNorm2d(num_channels)
        self.bn2 =nn.BatchNorm2d(num_channels)
    
    # 这里是残差块的乾坤所在
    def forward(self,X:t.Tensor)->t.Tensor:
        Y = relu(self.bn1(self.conv1(X)))
        Y=self.bn2(self.conv2(Y))
        if self.conv3:
            X=self.conv3(X)
        # 加法运算
        Y=Y+X
        return relu(Y)

In [7]:
# 常见的用法
blk = Residual(3,3)
X = t.rand(4, 3, 6, 6)
Y = blk.forward(X)
Y.shape

torch.Size([4, 3, 6, 6])

In [8]:
# 缩减长宽
blk = Residual(3, 6, use_1x1conv=True, strides=2)
blk.forward(X).shape


torch.Size([4, 6, 3, 3])

# 定义ResNet模型
resnet 18

In [9]:
b1 = nn.Sequential(
    nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
    nn.BatchNorm2d(64),nn.ReLU(),
    nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
)

def res_block(input_channels,num_channels,num_residuals,first_block=False)->nn.Sequential:
    blk=[]
    for i in range(num_residuals):
        if i==0 and not first_block:
            blk.append(Residual(input_channels,num_channels,use_1x1conv=True,strides=2))
        else:
            blk.append(Residual(num_channels,num_channels))
    return nn.Sequential(*blk)

b2=res_block(64,64,2,first_block=True)
b3=res_block(64,128,2)
b4=res_block(128,256,2)
b5=res_block(256,512,2)

net = nn.Sequential(
    b1,b2,b3,b4,b5,
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(512,10)
)

In [11]:
print(net)

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-

In [10]:
X = t.rand(size=(1, 1, 224, 224))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 128, 28, 28])
Sequential output shape:	 torch.Size([1, 256, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 512, 1, 1])
Flatten output shape:	 torch.Size([1, 512])
Linear output shape:	 torch.Size([1, 10])


```
Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): Sequential(
    (0): Residual(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (3): Sequential(
    (0): Residual(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (4): Sequential(
    (0): Residual(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (5): AdaptiveAvgPool2d(output_size=(1, 1))
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=512, out_features=10, bias=True)
)

```

# torchvision 中的resnet

In [None]:
import torch as t
from torch.nn import Flatten,Linear,Conv2d,Module,BatchNorm2d,ReLU,MaxPool2d,Sequential,AdaptiveAvgPool2d
from typing import List,Optional
from torch import Tensor
class Bottleneck(Module):
    expansion =4
    def __init__(self,in_channels,channels,stride=1,downsample:Optional[Module]=None):
        super().__init__()
        self.conv1 =Conv2d(in_channels,channels,kernel_size=1,stride=1,bias=False)
        self.bn1 = BatchNorm2d(channels)
        self.conv2=Conv2d(channels,channels,kernel_size=3,stride=stride,padding=1,bias=False)
        self.bn2=BatchNorm2d(channels)
        self.conv3=Conv2d(channels,channels*self.expansion,kernel_size=1,stride=1,bias=False)
        self.bn3=BatchNorm2d(channels*self.expansion)
        self.relu=ReLU(inplace=True)
        self.downsample=downsample
    
    def forward(self,x:Tensor)->Tensor:
        out=self.relu(self.bn1(self.conv1(x)))
        out=self.relu(self.bn2(self.conv2(x)))
        out=self.bn3(self.conv3(out))

        if self.downsample==None:
            identity=x
        else:
            identity=self.downsample(x)
        
        out+=identity
        out=self.relu(out)
        return out

In [None]:
class ResNet(Module):
    # Generic building func for ResNet-n
    def __init__(self,layers:List[int],num_classes=1000)->None:
        super().__init__()
        self.in_channels =64
        self.bottleneck=Bottleneck
        # the following layers define stage1( before residual blocks)
        self.conv1=Conv2d(3,self.in_channels,kernel_size=7,stride=2,padding=3,bias=False)
        self.bn1=BatchNorm2d(self.in_channels)
        self.relu=ReLU()
        self.maxpool=MaxPool2d(kernel_size=3,stride=2,padding=1)
        # the following layers fefine stage 2-5 (residual blocks)
        self.layer1=self._make_layer(64,layers[0])
        self.layer2=self._make_layer(128,layers[1],stride=2)
        self.layer3=self._make_layer(256,layers[2],stride=2)
        self.layer4=self._make_layer(512,layers[3],stride=2)
        self.avgpool=AdaptiveAvgPool2d((1,1))
        self.fc = Linear(512*self.bottleneck.expansion,num_classes)
    
    def _make_layer(self,channels,num_bottleneck,stride=1):
        downsample =Sequential(
            Conv2d(self.in_channels,channels*self.bottleneck.expansion,kernel_size=1,stride=stride,bias=False),
            BatchNorm2d(channels*self.bottleneck.expansion)
        )
        layers=[]
        layers.append(self.bottleneck(self.in_channels,channels,stride,downsample))
        self.in_channels*=self.bottleneck.expansion
        if channels!=64:
            self.in_channels=int(self.in_channels/2)
        for _ in range(1,num_bottleneck):
            layers.append(self.bottleneck(self.in_channels,channels))
        return Sequential(*layers)

    def forward(self,x:Tensor)->Tensor:
        out = self.maxpool(self.relu(self.bn1(self.conv1(x))))

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        # global avgpooling func
        out =self.avgpool(out)
        out=t.flatten(out,1)
        out=self.fc(out)
        return out
    
    def ResNet50(num_classess):
        return ResNet([3, 4, 6, 3], num_classess)

    def ResNet101(num_classess):
        return ResNet([3, 4, 23, 3], num_classess)

    def ResNet152(num_classess):
        return ResNet([3, 8, 36, 3], num_classess)

In [None]:
if __name__ == '__main__':
    import torchvision
    num_classess = 1000
    # Build model structure
    resnet152 = ResNet.ResNet152(num_classess)
    # Load parameters from pretrained model
    pretraind_model_urls = {
        'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
        'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
        'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', }
    state_dict = torchvision.models.utils.load_state_dict_from_url(
        pretraind_model_urls['resnet152'],
        progress=True)
    resnet152 = resnet152.load_state_dict(state_dict)
