# 卷积神经网络

 ## 1 简单的卷积网络
 ### 1.1 卷积模块

In [1]:
import torch
import math
import numpy as np
from torch import nn, optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

In [2]:
# 定义简单的卷积网络模型
class SimpleCNN(nn.Module):
    def __init__(self):            # 定义网络结构
        super().__init__()         # 输入 [batch_size,3,32,32] 3表示深度
        layer1 = nn.Sequential()   # 叠加第1个网络层，卷积层
        # (in_channels, out_channels, kernel_size,stride,padding)
        layer1.add_module('conv1', nn.Conv2d(32, 3, 1, padding=1))
        # get [batch_size, 32, 32, 32]
        layer1.add_module('relu1', nn.ReLU(True))
        layer1.add_module('pool1', nn.MaxPool2d(2,2))
        # get [batch_size, 32, 16, 16]
        self.layer1 = layer1
        
        layer2 = nn.Sequential()   # 定义第2个网络层
        layer2.add_module('conv2', nn.Conv2d(32, 64, 3, 1, padding=1))
        # get [batch_size, 64, 16, 16] 
        layer2.add_module('relu2', nn.ReLU(True))
        layer2.add_module('pool2', nn.MaxPool2d(2,2))
        # get [batch_size, 64, 8, 8]
        self.layer2 = layer2
        
        layer3 = nn.Sequential()   # 定义第3个网络层
        layer3.add_module('conv3', nn.Conv2d(64, 128, 3, 1, padding=1))
        # get [batch_size, 128, 8, 8]
        layer3.add_module('relu3', nn.ReLU(True))
        layer3.add_module('pool3', nn.MaxPool2d(2,2))
        # get [batch_size, 128, 4, 4]
        self.layer3 = layer3
        
        layer4 = nn.Sequential()   # 定义第4个网络层，全连接层
        layer4.add_module('fc1', nn.Linear(128*4*4, 512))
        # get [batch_size, 512]
        layer4.add_module('fc_relu1', nn.ReLU(True))
        layer4.add_module('fc2', nn.Linear(512, 64))
        # get [batch_size, 64]
        layer4.add_module('fc2_relu2', nn.ReLU(True))
        layer4.add_module('fc3', nn.Linear(64, 10))
        # get [batch_size, 10]
        self.layer4 = layer4
    def forward(self, x):
        conv1 = self.layer1(x)   # 前3层为卷积层
        conv2 = self.layer2(conv1)
        conv3 = self.layer3(conv2)
        # 注意：全连接前要把数据的维度将为两维，-1维度长*宽*深度
        fc_input = conv3.view(conv3.size(0), -1)  # reshape为 [batch_size, -1]
        fc_out = self.layer4(fc_input)
        return fc_out
model = SimpleCNN()
print(model)

SimpleCNN(
  (layer1): Sequential(
    (conv1): Conv2d(32, 3, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace)
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer4): Sequential(
    (fc1): Linear(in_features=2048, out_features=512, bias=True)
    (fc_relu1): ReLU(inplace)
    (fc2): Linear(in_features=512, out_features=64, bias=True)
    (fc2_relu2): ReLU(inplace)
    (fc3): Linear(in_features=64, out_features=10, bias=True)
  )
)


小结:
- 模型层顺序：CNN+BN+ReLU+Pooling，先激活再池化;FC+ReLU+Dropout
- 输出层不用激活函数

### 1.2 提取模型的层结构

|nn.Module属性|功能|实例|
|-|-|-|
|children()|返回下一级迭代器|self.layer1|
|modules()|返回所有模块迭代器|self.layer1.conv1|
|named_children()|返回模块的名称|其他功能同上|
|named_modules()|返回模块的名称|其他功能同上|

In [3]:
# 提取已有模型中的结构
new_model = nn.Sequential(*list(model.children())[:2])   # *代表使用可变参数
# print(new_model)
# list(model.children())[0]
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        print(m, m.kernel_size, m.out_channels)
        print('-----')

Conv2d(32, 3, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1)) (1, 1) 3
-----
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3, 3) 64
-----
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3, 3) 128
-----


In [4]:
# 提取模型中所有的卷积层
conv_model = nn.Sequential()
for layer in model.named_modules():    # layer[0]层名称, layer[1]层类型
#     print(layer)

    if isinstance(layer[1], nn.Conv2d):  
        print(layer)
        print('-'*50)
        conv_model.add_module(layer[0].split('.')[-1], layer[1])
print('1.新网络的结构:')
print(conv_model)

('layer1.conv1', Conv2d(32, 3, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1)))
--------------------------------------------------
('layer2.conv2', Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)))
--------------------------------------------------
('layer3.conv3', Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)))
--------------------------------------------------
1.新网络的结构:
Sequential(
  (conv1): Conv2d(32, 3, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


### 1.3 提取参数及初始化

|nn.Module属性|功能|
|-|-|
|parameters()|返回全部参数的迭代器|
|named_parameters()|返回模块的名称，其他功能同上|

In [5]:
# 提取模型的所有参数
for param in model.named_parameters():
    # param[0]:层名称.weight/bias  param[1]:参数值，权重是Variable
    print(param[0])
    print((param[1]).data.shape)
    print('-'*50)

layer1.conv1.weight
torch.Size([3, 32, 1, 1])
--------------------------------------------------
layer1.conv1.bias
torch.Size([3])
--------------------------------------------------
layer2.conv2.weight
torch.Size([64, 32, 3, 3])
--------------------------------------------------
layer2.conv2.bias
torch.Size([64])
--------------------------------------------------
layer3.conv3.weight
torch.Size([128, 64, 3, 3])
--------------------------------------------------
layer3.conv3.bias
torch.Size([128])
--------------------------------------------------
layer4.fc1.weight
torch.Size([512, 2048])
--------------------------------------------------
layer4.fc1.bias
torch.Size([512])
--------------------------------------------------
layer4.fc2.weight
torch.Size([64, 512])
--------------------------------------------------
layer4.fc2.bias
torch.Size([64])
--------------------------------------------------
layer4.fc3.weight
torch.Size([10, 64])
--------------------------------------------------
layer

In [6]:
# 提取参数并初始化????????
init = nn.init
for m in model.modules():  # 访问所有模块
    if isinstance(m, nn.Conv2d):
        init.normal_(m.weight.data)
        init.xavier_normal_(m.weight.data)
        init.kaiming_normal_(m.weight.data)
        m.bias.data.fill_(0)
    elif isinstance(m, nn.Linear):
        m.weight.data.normal_()

## 2 卷积神经网络实例
### 2.1 LeNet网络

conv层参数设置：[in_channels,out_channels,kernel_size,stride,padding]
$$W_{conv} = \frac{W_{input}-kernel\_size+2*padding}{stride}+1$$

maxpooling层参数设置：[kernel_size,stride,padding]

fc层参数设置：[in_channels,out_channels]

- 表格参数设置：[卷积核数/宽/高/深度] [池化宽/高] s:stride p:padding

|结构|参数设置|数据维度|
|-|-|-|
|input||[batch_size,3,32,32]|
|conv1|$[6\times5\times5\times3]$<br>s=1 p=0|[batch_size,6,28,28]|
|pool1|$[2\times2]$<br>s=2 p=0|[batch_size,6,14,14]|
|conv2|$[16\times5\times5\times6]$<br>s=1 p=0 |[batch_size,16,10,10]|
|pool2|$[2\times2]$<br>s=2 p=0|[batch_size,16,5,5]|
|fc1|$[(16*5*5)\times120]$|[batch_size,120]|
|fc2|$[120\times84]$|[batch_size,84]|
|fc3|$[84\times10]$|[batch_size,10]|
|--------|-------------------------------|--------------------------------|

In [7]:
# 定义网络结构
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()  # 输入维度[b,3,32,32]
        
        # 第一个卷积、池化模块
        layer1 = nn.Sequential()
        layer1.add_module('conv1', nn.Conv2d(3,6,5,1,padding=0))
        # get [b,6,28,28]
        layer1.add_module('pool1', nn.MaxPool2d(2,2))
        # get [b,6,14,14]
        self.layer1 = layer1
        
        # 第二个卷积、池化模块
        layer2 = nn.Sequential()
        layer2.add_module('conv2', nn.Conv2d(6,16,5,padding=0))
        # get [b,16,10,10]
        layer2.add_module('pool2', nn.MaxPool2d(2,2))
        # get [b,16,5,5]
        self.layer2 = layer2
        
        # 第三个全连接模块
        # 先将卷积的feature map reshape为[b,16*5*5]的形状
        layer3 = nn.Sequential()
        layer3.add_module('fc1', nn.Linear(16*5*5, 120))
        # get [b,120]
        layer3.add_module('fc2', nn.Linear(120, 84))
        # get [b, 84]
        layer3.add_module('fc3', nn.Linear(84, 10))
        # get [b, 10]
        self.layer3 = layer3
    def forward(self, x):
        conv1 = self.layer1(x)
        conv2 = self.layer2(conv1)
        fc_input = conv2.view(conv2.size(0), -1)
        fc_output = self.layer3(fc_input)
        return fc_output

In [8]:
%%time
LeNet_model = LeNet()
data_random = torch.Tensor(range(3*32*32))
input = data_random.reshape([1,3,32,32])
print('1.随机输入的数据:', input.shape)
output = LeNet_model(Variable(input))
print('2.模型输出:', output.shape)

1.随机输入的数据: torch.Size([1, 3, 32, 32])
2.模型输出: torch.Size([1, 10])
Wall time: 295 ms


### 2.2 AlexNet
2012年ILSVRC冠军网络(8层)

表格参数设置(与函数对应)：
- 卷积 [in_channels,out_channels,kernel_size,stride,padding]

- 池化 [kernel_size,stride,padding]

|结构|参数设置|数据维度|
|-|-|-|
|input||[batch_size,3,227,227]|
|conv1|[3,96,11,4,padding=0]  |[batch_size,96,55,55]|
|conv2|[96,256,5,1,padding=2] |[batch_size,256,55,55]|
|MaxPool|[3,2]                |[batch_size,256,27,27]|
|conv3|[256,384,3,1,padding=1]|[batch_size,384,27,27]|
|MaxPool|[3,2]                |[batch_size,384,13,13]|
|conv4|[384,384,3,1,padding=1]|[batch_size,384,13,13]|
|conv5|[384,256,3,1,padding=1]|[batch_size,256,13,13]|
|MaxPool|[3,2]                |[batch_size,256,6,6]|
|dense1|[256\*6\*6,4096]      |[batch_size,4096]|
|dense2|[4096,4096]           |[batch_size,4096]|
|dense3|[4096,1000]           |[batch_size,1000]|

In [9]:
# 定义AlexNet网络
class AlexNet(nn.Module):
    def __init__(self):
        super().__init__()    # 输入 [b,3,227,227]
        # 定义第1个卷积层
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 96, 11, 4, 0),    # get [b,96,55,55]
            nn.ReLU(True)
        )
        # 定义第2个卷积层
        self.conv2 = nn.Sequential(
            nn.Conv2d(96, 256, 5, 1, 2),   # get [b,256,55,55]
            nn.ReLU(True),
            nn.MaxPool2d(3,2)              # get [b,256,27,27]
        )
        # 定义第3个卷积层
        self.conv3 = nn.Sequential(
            nn.Conv2d(256, 384, 3, 1, 1),  # get [b,384,27,27]
            nn.ReLU(True),
            nn.MaxPool2d(3, 2)             # get [b,384,13,13]
        )
        # 定义第4个卷积层
        self.conv4 = nn.Sequential(
            nn.Conv2d(384, 384, 3, 1, 1),  # get [b,384,13,13]
            nn.ReLU(True),
        )
        # 定义第5个卷积层
        self.conv5 = nn.Sequential(
            nn.Conv2d(384, 256, 3, 1, 1),  # get [b,256,13,13]
            nn.ReLU(True),
            nn.MaxPool2d(3, 2)             # get [b,256,6,6]
        )
        # 定义6,7,8层全连接层
        self.dense = nn.Sequential(
            nn.Linear(256*6*6, 4096),      # get [b,4096]，需先reshape[b,9216]
            nn.ReLU(True),
            nn.Dropout(0.5),               # dropout减少正则化
            nn.Linear(4096, 4096),         # get [b,4096]
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, 1000)          # get [b,1000]
        )
    def forward(self, x):
        conv1_out = self.conv1(x)
        conv2_out = self.conv2(conv1_out)
        conv3_out = self.conv3(conv2_out)
        conv4_out = self.conv4(conv3_out)
        conv5_out = self.conv5(conv4_out)
        dense_input = conv5_out.view(conv5_out.size(0), -1)  # reshape 为一维
        out = self.dense(dense_input)
        return out

In [10]:
%%time
AlexNet_model = AlexNet()
# print(model)
data_random = torch.Tensor(range(3*227*227))
input = data_random.reshape([1,3,227,227])
print('1.随机输入的数据:', input.shape)
output = AlexNet_model(Variable(input))
print('2.模型输出:', output.shape)

1.随机输入的数据: torch.Size([1, 3, 227, 227])
2.模型输出: torch.Size([1, 1000])
Wall time: 644 ms


### 2.3 VGGNet
ImageNet 2014年亚军，使用小滤波器和更深层网络

VGG16(16层)网络表格参数设置(与函数对应)：
- 卷积 [in_channels,out_channels,kernel_size,stride,padding]

- 池化 [kernel_size,stride,padding]

|结构|参数设置|数据维度|
|-|-|-|
|input||[batch_size,3,224,224]|
|conv1|[3,64,3,1,padding=1]   |[batch_size,64,224,224]|
|conv2|[64,64,3,1,padding=1]  |[batch_size,64,224,224]|
|MaxPool|[2,2]                |[batch_size,64,112,112]|
|conv3|[64,128,3,1,padding=1] |[batch_size,128,112,112]|
|conv4|[128,128,3,1,padding=1]|[batch_size,128,112,112]|
|MaxPool|[2,2]                |[batch_size,128,56,56]|
|conv5|[128,256,3,1,padding=1]|[batch_size,256,56,56]|
|conv6|[256,256,3,1,padding=1]|[batch_size,256,56,56]|
|conv7|[256,256,3,1,padding=1]|[batch_size,256,56,56]|
|MaxPool|[2,2]                |[batch_size,256,28,28]|
|conv8|[256,512,3,1,padding=1]|[batch_size,512,28,28]|
|conv9|[512,512,3,1,padding=1]|[batch_size,512,28,28]|
|conv10|[512,512,3,1,padding=1]|[batch_size,512,28,28]|
|MaxPool|[2,2]                 |[batch_size,512,14,14]|
|conv11|[512,512,3,1,padding=1]|[batch_size,512,14,14]|
|conv12|[512,512,3,1,padding=1]|[batch_size,512,14,14]|
|conv13|[512,512,3,1,padding=1]|[batch_size,512,14,14]|
|MaxPool|[2,2]                |[batch_size,512,7,7]|
|dense1|[512\*7\*7,4096]      |[batch_size,4096]|
|dense2|[4096,4096]           |[batch_size,4096]|
|dense3|[4096,1000]           |[batch_size,1000]|

小结:
- VGGNet中使用$3\times3$的卷积核，stride=1，padding=1，所以卷积完后图像的宽和高不变，仅通道数改变
- VGGNet中使用$2\times2$的最大池化核,stride=2，所以池化后图像的宽和高会减为一半，但通道数不会改变

In [11]:
# 定义VGG-16的网络结构
# 方法一，笨办法，逐层定义
class VGG(nn.Module):
    def __init__(self):
        super().__init__()                 # 输入 [b,3,224,224]
        # conv参数 [in_channels,out_channels,kernel_size,stride,padding]
        # maxpool参数 [kernel_size, stride]
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),     # get [b,64,224,224]
            nn.ReLU(True),
            nn.Conv2d(64, 64, 3, 1, 1),    # get [b,64,224,224]
            nn.ReLU(True),
            
            nn.MaxPool2d(2, 2),            # get [b,64,112,112]
            
            nn.Conv2d(64, 128, 3, 1, 1),   # get [b,128,112,112]
            nn.ReLU(True),
            nn.Conv2d(128, 128, 3, 1, 1),  # get [b,128,112,112]
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),            # get [b,128,56,56]
            
            nn.Conv2d(128, 256, 3, 1, 1),  # get [b,256,56,56]
            nn.ReLU(True),
            nn.Conv2d(256, 256, 3, 1, 1),  # get [b,256,56,56]
            nn.ReLU(True),
            nn.Conv2d(256, 256, 3, 1, 1),  # get [b,256,56,56]
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),            # get [b,256,28,28]
            
            nn.Conv2d(256, 512, 3, 1, 1),  # get [b,512,28,28]
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1, 1),  # get [b,512,28,28]
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1, 1),  # get [b,512,28,28]
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),            # get [b,512,14,14]
            
            nn.Conv2d(512, 512, 3, 1, 1),  # get [b,512,14,14]
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1, 1),  # get [b,512,14,14]
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1, 1),  # get [b,512,14,14]
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)            # get [b,512,7,7]    
        )
        # 将卷积层reshape为一维向量
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),     # get [b,4096]
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),         # get [b,4096]
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, 1000)         # get [b,1000]
        )
    def forward(self, x):
        conv_features = self.features(x)   # 卷积层得到特征
        # reshape 为一维向量
        tense_input = conv_features.view(conv_features.size(0), -1)
        output = self.classifier(tense_input)
        return output

In [12]:
%%time
VGG16_model = VGG()
# print(model)
data_random = torch.Tensor(range(3*224*224))
input = data_random.reshape([1,3,224,224])
print('1.随机输入的数据:', input.shape)
output = VGG16_model(Variable(input))
print('2.模型输出:', output.shape)

1.随机输入的数据: torch.Size([1, 3, 224, 224])
2.模型输出: torch.Size([1, 1000])
Wall time: 1.66 s


In [13]:
# 方法二，完整的定义VGG网络
# 定义VGGNet的4种结构
cfg = {
    'VGG11':[64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13':[64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16':[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512,\
             512, 512, 'M'],
    'VGG19':[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, \
             'M', 512, 512, 512, 512, 'M']    
}
class VGG(nn.Module):
    def __init__(self, vgg_name, num_classes=10):
        super().__init__()    # 输入 [b,3,224,224]
        self.features = self._make_layers(cfg[vgg_name])    # 卷积层提取特征
        self.classifier = nn.Sequential(
            # fc6
            nn.Linear(512*7*7, 4096),        # get [b,4096]
            nn.ReLU(True),
            nn.Dropout(),
            # fc7
            nn.Linear(4096, 4096),           # get [b,4096]
            nn.ReLU(True),
            nn.Dropout(),
            # fc8
            nn.Linear(4096, num_classes)     # get [b,num_classes]          
        )
        self._initialize_weights()           # 初始化权重！！！
    def forward(self, x):
        conv_features = self.features(x)     # 卷积提取特征 
        tense_input = conv_features.view(conv_features.size(0), -1)   # reshape为一维向量
        output = self.classifier(tense_input)
        return output
    
    # 生成网络的层信息
    def _make_layers(self, net_cfg):         
        layers = []    # 将网络的结构写入到列表中
        in_channels = 3
        for x in net_cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]    # 将结构append到列表中
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, stride=1, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x                # 令下一层的输入为上层的输出
        # 有了均值池化层，可以减少全连接层的个数！！！！！！
#         layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 
        return nn.Sequential(*layers)          # *代表可变参数，列表用*，字典用**
    
    # 初始化网络权重
    def _initialize_weights(self):
        for m in self.modules():               # 访问网络的各个模块
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0]*m.kernel_size[1]*m.out_channels
                m.weight.data.normal_(0, math.sqrt(2.0 /n ))    # 卷积权重初始化方法
                if m.bias is not None:
                    m.bias.data.zero_()                         # 卷积偏置初始为0
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)              # BN层权重初始为1
                m.bias.data.zero_()                 # BN层偏置初始为0
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)      # fc层权重初始为高斯分布
                m.bias.data.zero_()                 # fc层偏置初始为0                

In [14]:
%%time
vgg16_net = VGG('VGG16')
# print(vgg16_net)
input = torch.randn([1,3,224,224])
output = vgg16_net(Variable(input))
print('1.网络的输出:', output.shape)

1.网络的输出: torch.Size([1, 10])
Wall time: 2.73 s


### 2.4 GoogLeNet
又称InceptionNet(22层)，2014年ImageNet冠军，参数比AlenNet少12倍

<center>GoogLeNet(Inception_v1)网络配置</center>
<img src="./image/googlenet_cfg.png" width="90%" height="90%">
<img src="./image/Inception_module.png" width="70%" height="70%">

- 使用多尺度卷积，将相关性强的特征汇聚到一起进行融合，增强了特征的表达能力，减少了单卷积核造成的稀疏性，该方法相当于对稀疏的矩阵分解为稠密的子矩阵进行运算，提高了计算的效率
- 多种卷积核提取特征，得到的feature map宽和高相同且**与输入一致**，但深度不同，最后各卷积核的结果在**深度**方向上融合，**尺寸不变，深度增加**
- $3\times3$和$5\times5$卷积前先使用$1\times1$卷积核对输入数据进行降维(只有深度上变化)，可以减少网络参数，并不影响性能
- 在Inception_v2中使用$n\times1$与$1\times n$代替$n\times n$的卷积核，但该分解在中度大小的feature map(12~20)效果会很好，在网络前面的层尽量少用
- 在网络的前几层，尽量得到较大的特征图，使深度逐渐加深，避免表达瓶颈(数据维度急剧降低，损失有效信息)
- **完整的卷积公式**：$w_{out}=floor[(w_{input}-filter+2*padding)\ /\ stride] + 1 $，Inception输入维度为$224\times224\times3$，第一个卷积核为7，步长为2，padding为3，根据该公式卷积后得到的feature map大小为$floor(111.5)+1=112$

In [15]:
# 定义Inception_v1网络结构
# 1.定义Inception模块部分
# 卷积函数默认stride=1，padding=0
class Inception(nn.Module):
    def __init__(self, in_channels, n1x1, n3x3_r, n3x3, n5x5_r, n5x5, pool3x3):
        super().__init__()    # 输入深度为 in_channels, 剩下参数为各分支的输出深度，宽和高不变
        # 1x1 conv branch
        self.b1 = nn.Sequential(
            nn.Conv2d(in_channels, n1x1, kernel_size=1, stride=1, padding=0),    # get depth n1x1
            nn.BatchNorm2d(n1x1),
            nn.ReLU(True)
        )
        
        # 1x1 conv -> 3x3 conv branch
        self.b2 = nn.Sequential(
            nn.Conv2d(in_channels, n3x3_r, kernel_size=1, stride=1, padding=0),  # get depth n3x3_r
            nn.BatchNorm2d(n3x3_r),
            nn.ReLU(True),
            nn.Conv2d(n3x3_r, n3x3, kernel_size=3, stride=1, padding=1),         # get depth n3x3
            nn.BatchNorm2d(n3x3),
            nn.ReLU(True)
        )
        
        # 1x1 conv -> 5x5 conv branch，用两个3x3的代替5x5卷积核
        self.b3 = nn.Sequential(
            nn.Conv2d(in_channels, n5x5_r, kernel_size=1, stride=1, padding=0),   # get depth n5x5_r
            nn.BatchNorm2d(n5x5_r),
            nn.ReLU(True),
            nn.Conv2d(n5x5_r, n5x5, kernel_size=3, stride=1, padding=1),          # get depth n5x5
            nn.BatchNorm2d(n5x5),
            nn.ReLU(True),
            nn.Conv2d(n5x5, n5x5, kernel_size=3, stride=1, padding=1),            # get depth n5x5
            nn.BatchNorm2d(n5x5),
            nn.ReLU(True),
        )
        
        # 3x3 pool -> 1x1 conv
        self.b4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),               # get depth in_channels
            nn.Conv2d(in_channels, pool3x3, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(pool3x3),
            nn.ReLU(True)                                                   # get depth pool3x3
        )
    def forward(self, x):
        branch1 = self.b1(x)
        branch2 = self.b2(x)
        branch3 = self.b3(x)
        branch4 = self.b4(x)
#         print(branch1.shape, branch2.shape, branch3.shape, branch4.shape,)
        return torch.cat([branch1, branch2, branch3, branch4], 1)

#定义GoogLeNet的网络结构
class GoogLeNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()                         # 输入 [b,3,224,224]
        # 先设置Inception前的卷积层，共 3 层   
        self.pre_layers = nn.Sequential(
            # 7x7 conv
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),     # 向下取整 get [b,64,112,112]
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),         # get [b,64,56,56]
            
            # 1x1 conv -> 3x3 conv
            nn.Conv2d(64, 64, kernel_size=1, stride=1, padding=0),    # get [b,64,56,56]
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1),   # get [b,192,56,56]
            nn.BatchNorm2d(192),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)          # get [b,192,28,28]
        )
        
        # Inception模块部分 参数：[in_channels, n1x1, n3x3_r, n3x3, n5x5_r, n5x5, pool3x3]
        self.a3 = Inception(192, 64,  96,  128,  16, 32, 32)             # get [b,256,28,28]
        self.b3 = Inception(256, 128, 128, 192,  32, 96, 64)             # get [b,480,28,28]   
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)    # get [b,480,14,14]
        
        self.a4 = Inception(480, 192, 96,  208,  16, 48,  64)             # get [b,512,14,14]
        self.b4 = Inception(512, 160, 112, 224,  24, 64,  64)             # get [b,512,14,14]
        self.c4 = Inception(512, 128, 128, 256,  24, 64,  64)             # get [b,512,14,14]
        self.d4 = Inception(512, 112, 144, 288,  32, 64,  64)             # get [b,528,14,14]
        self.e4 = Inception(528, 256, 160, 320,  32, 128, 128)            # get [b,832,14,14]
        self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)     # get [b,832,7,7]
        
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)             # get [b,832,7,7]
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)             # get [b,1024,7,7]
        self.pool5 = nn.AvgPool2d(kernel_size=7, stride=1, padding=0)     # get [b,1024,1,1]
        
        self.dropout = nn.Dropout(0.4)                             
        self.classifier = nn.Linear(1024, num_classes)                    # get [b,num_classes]
    def forward(self, x):   # Inception_v1共22层，这里没有添加a4与d4后的辅助损失，无自定义权重初始化
        x = self.pre_layers(x)       # 3层
        x = self.a3(x)               # 4层
        x = self.b3(x)
        x = self.pool3(x)
        
        x = self.a4(x)               # 10层
        x = self.b4(x)
        x = self.c4(x)
        x = self.d4(x)
        x = self.e4(x)
        x = self.pool4(x)
        
        x = self.a5(x)               # 4层
        x = self.b5(x)
        x = self.pool5(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)       # 1层
        return x

In [16]:
%%time
input = torch.randn([1,3,224,224])
inception_module = Inception(3, 64, 96, 128, 16, 32, 32)
output = inception_module(Variable(input))
print('1.Inception模块的输出:', output.shape)
inception_v1 = GoogLeNet()
output = inception_v1(Variable(input))
print('2.Inception_v1模型的输出:', output.shape)

1.Inception模块的输出: torch.Size([1, 256, 224, 224])
2.Inception_v1模型的输出: torch.Size([1, 10])
Wall time: 802 ms


小结:
- Inception模块本身只改变图像的深度，不改变尺寸
- 每次在池化后，图像的尺寸会缩小为原来的一半
- 网络的最后几层中使用Global Average Pooling，而减少了全连接层的使用，减少参数及过拟合的可能性

### 2.5 ReNet
2015年ImageNet冠军，残差网络解决了准确率Degradation问题及梯度消失问题，可以使网络变得更深

<center>ResNet网络配置</center>

<img src="./image/resnet_cfg.jpg" width=85% height=85%>


- 按照表格中的信息，在bottleneck中1x1的卷积核里，如果使用stride=2就会忽略某些数据，caffe定义的ResNet是这样实现的，但PyTorch中是将第二层3x3卷积核步长设置为2来减小尺寸，两种方法都行吗？？？
[PyTorch的实现](https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py) 和
[caffe的实现](https://github.com/SnailTyan/caffe-model-zoo/blob/master/ResNet-50/train_val.prototxt)

<img src="./image/residual_block.jpg" width=40% height=40%/> 


<img src="./image/bottleneck.jpg" width=60% height=60%/>

- 使用ship connection连接(或短路连接)的方式构建残差，可以解决degradation问题及梯度消失问题
- bottleneck连接的方式可以减少参数
- 对于Indentity map部分，如果输入好输出维度一致，直接相加即可，如果不一致有两种方法：(1) 使用zero-padding增加维度，但要做一个pooling保证参数不变；(2) 采用新的映射(projection shortcut)，一般使用1x1的卷积，但会增加计算量和参数

In [17]:
# 定义ResNet的网络结构
# 定义基本的Block用于18、34层的ResNet网络
class BasicBlock(nn.Module):
    expansion = 1   # 在输入维度和输出维度相同的块，输入和输出深度相差的倍数
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()             # 输入 [b, in_channels,xx,xx]
        # get [b,out_channels,xx,xx] 输入尺寸与stride有关：1 尺寸不变，2 缩为一半
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        # gwt [b,out_channels,xx,xx] 输入尺寸不变
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.residual = nn.Sequential()
        if stride != 1 or in_channels != self.expansion*out_channels:
            self.residual = nn.Sequential(
                # 该句使skip connection的输入输出维度变得一致 self.expansion*out_channels与stride很关键
                nn.Conv2d(in_channels, self.expansion*out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(self.expansion*out_channels)
            )
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)          # 残差块的第一层输出，需要激活
        out = self.conv2(out)
        out = self.bn2(out)        # 残差块的第二层，没有激活
        out += self.residual(x)    # 与残差的identity map即skip connection部分叠加
        out = F.relu(out)          # 原输出与skip connection叠加的输出同时做激活
        return out
# 定义Bottleneck模块
class Bottleneck(nn.Module):
    expansion = 4                  # 同一组中输入和输出维度的相差倍数
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        # stride=2 尺寸变为原来一半，stride=1 尺寸保持不变 get [b,out_channels,xx,xx]
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        # 保证尺寸不变，仅改变输出通道 get [b,out_channels,xx,xx]
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        # 保证尺寸不变，深度变为expansion倍 get [b,expansion*out_channels,xx,xx]
        self.conv3 = nn.Conv2d(out_channels, self.expansion*out_channels,kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*out_channels)
        
        # skip connection部分
        self.residual = nn.Sequential()
        if stride != 1 or in_channels != self.expansion*out_channels:
            self.residual = nn.Sequential(
                # 统一深度与尺寸 get [b, self.expansion*out_channels,xx,xx]
                nn.Conv2d(in_channels, self.expansion*out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(self.expansion*out_channels)
            )
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)           # 第一层卷积的输出
        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu(out)           #第二层卷积的输出
        out = self.conv3(out)
        out = self.bn3(out)         # 第三层卷积的输出
        out += self.residual(x)     # skip connection是直接从输入得到的，所以参数为 x
        out = F.relu(out)           # 第三层卷积与residual的叠加结果再激活
        return out
    
# 定义ResNet的结构
class ResNet(nn.Module):
    # 残差块的类别；每组残差块的个数列表，输出的类别数
    def __init__(self, block, num_blocks: list, class_nums=10):
        super().__init__()                                                      # 输入 [b,3,224,224]
        self.pre_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),   # get [b,64,112,112]
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)                    # get [b,64,56,56]
        )
        
        # 堆叠残差模块
        self.in_channels = 64
        self.layer1 = self._make_layer(block, 64,  num_blocks[0], stride=1) # get [b,64*expansion,56,56]
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) # get [b,128*expansion,28,28]
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) # get [b,256*expansion,14,14]
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) # get [b,512*expansion,7,7]
        
        self.avg_pool = nn.AvgPool2d(kernel_size=7, stride=1, padding=0)    # get [b,512*expansion,1,1]
        self.classifier = nn.Linear(512*block.expansion, class_nums)        # get [b,class_nums]
        
        # 权重初始化
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
                
    def _make_layer(self, block, out_channels, num_blocks, stride):
        layers = []      # 保存网络层为列表
        strides = [stride] + [1]*(num_blocks-1)         #一组残差块只有第一个需要改变尺寸，即stride=2
        for stride in strides:
            layers += [block(self.in_channels, out_channels, stride=stride)]   # 构建残差模块
            self.in_channels = block.expansion * out_channels                  # 随着模块堆叠而改变
        return nn.Sequential(*layers)                                          # *表示可变参数
    
    def forward(self, x):
        out = self.pre_layers(x)           # 网络首层卷积的结果
        out = self.layer1(out)             # 4组残差块的输出
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)           # 全局池化
        out = out.view(out.size(0), -1)    # reshap为一维向量
        out = self.classifier(out)         # 进行全连接
        return out

def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])   # 列表数字代表各组残差块的个数

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])

In [18]:
%%time
input = Variable(torch.randn([1,3,224,224]))
block_module = BasicBlock(3, 64, 2)
output = block_module(input)
print('1.BlasicBlock的输出:', output.shape)
bottleneck_module = Bottleneck(3, 64, 2)
output = bottleneck_module(input)
print('2.Bottleneck的输出:', output.shape)
resnet18 = ResNet18()
output = resnet18(input)
print('3.ResNet18的输出:', output.shape)
resnet50 = ResNet50()
output = resnet50(input)
print('3.ResNet50的输出:', output.shape)

1.BlasicBlock的输出: torch.Size([1, 64, 112, 112])
2.Bottleneck的输出: torch.Size([1, 256, 112, 112])
3.ResNet18的输出: torch.Size([1, 10])
3.ResNet50的输出: torch.Size([1, 10])
Wall time: 1.1 s


小结:
- 对于残差块的skip connection的输入输出维度，指尺寸与深度完全一致才表示维度相同
- 在残差块中，最后一层卷积的结果与skip connection结果**叠加**后再进行激活
- ResNet网络中，图像的宽和高缩减为原来的一半不是使用pooling，而是将卷积核的stride设置为2
- 一组残差块中，仅第一个残差块需要将stride设置为2来降维，组内的其他块stride=1