# 含并行连结的网络(GoogleNet)

# Inception 块

In [1]:
import time 
import torch
from torch import nn, optim
import torch.nn.functional as F
import sys
sys.path.append("..")
import library.d2lzh_pytorch as d2l
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Inception 块
class Inception(nn.Module):
    # c1 - c4 为每条线里的层的输出通道数
    def __init__(self, in_c, c1, c2, c3, c4):
        super(Inception, self).__init__()
        # 线路一
        self.p1_1 = nn.Conv2d(in_channels=in_c, out_channels=c1, kernel_size=1)
        # 线路2
        self.p2_1 = nn.Conv2d(in_channels=in_c, out_channels=c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(in_channels=c2[0], out_channels=c2[1], kernel_size=3, padding=1)
        # 线路3
        self.p3_1 = nn.Conv2d(in_channels=in_c, out_channels=c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(in_channels=c3[0], out_channels=c3[1], kernel_size=5, padding=2)
        # 线路4
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels=in_c, out_channels=c4, kernel_size=1)
        
    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2((self.p4_1(x))))
        return torch.cat([p1, p2, p3, p4], dim=1) # 在通道维上输出

# GoogleNet 模型建立
GoogLeNet跟VGG一样,在主体卷积部分中使用用5个模块(block),每个模块之间使用步幅为2的 3 * 3 最大池化层来减小输出高宽。第一模块使用一个64通道的 7 * 7 卷积层。

In [3]:
b1 = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=64, kernel_size=7, stride=2, padding=3),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

In [4]:
b2 = nn.Sequential(
    nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1),
    nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

In [5]:
b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),  
                   Inception(256, 128, (128, 192), (32, 96), 64),  
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [6]:
b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),  
                   Inception(512, 160, (112, 224), (24, 64), 64),  
                   Inception(512, 128, (128, 256), (24, 64), 64),  
                   Inception(512, 112, (144, 288), (32, 64), 64),  
                   Inception(528, 256, (160, 320), (32, 128), 128),  
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [7]:
b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),  
                   Inception(832, 384, (192, 384), (48, 128), 128),  
                   d2l.GlobalAvgPool2d())

In [8]:
net = nn.Sequential(b1, b2, b3, b4, b5,  d2l.FlattenLayer(), nn.Linear(1024, 10))

In [9]:
X = torch.rand(1, 1, 96, 96)
for blk in net.children():  
    X = blk(X)  
    print('output shape: ', X.shape)

output shape:  torch.Size([1, 64, 24, 24])
output shape:  torch.Size([1, 192, 12, 12])
output shape:  torch.Size([1, 480, 6, 6])
output shape:  torch.Size([1, 832, 3, 3])
output shape:  torch.Size([1, 1024, 1, 1])
output shape:  torch.Size([1, 1024])
output shape:  torch.Size([1, 10])


In [10]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist_in_ch5(batch_size, resize=96)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 1.0334, train acc 0.602, test acc 0.817,  time 53.0 sec
epoch 2, loss 0.2122, train acc 0.843, test acc 0.851,  time 56.5 sec
epoch 3, loss 0.1166, train acc 0.870, test acc 0.881,  time 67.9 sec
epoch 4, loss 0.0759, train acc 0.889, test acc 0.879,  time 75.4 sec
epoch 5, loss 0.0552, train acc 0.898, test acc 0.892,  time 97.8 sec
