### ex11_1 Inception Model
本模型是一个简化版的GoogleNet的实现，同样针对MNIST数据集的卷积神经网络
主要是封装一个Inception模型， 此模型的直观思想在训练过程中对不同架构的神经网络分支进行一个自动的参数选择，从而选出最优的分支
此模型比上一个两层卷积的神经网络性能更优，经过10轮epoch,在测试集上准确率可达98.9%

#### 初始化基本设置并建立数据集

In [17]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

# 初始化并固定随机种子


def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True


setup_seed(1012)

# 设置GPU加速
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"The current computing device is {device.type} ")
if torch.cuda.is_available():
    print(f'The current GPU is :{torch.cuda.get_device_name(0)}')

# prepare dataset

batch_size = 64
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])
# 处理图像数据的一个转换类 将pillow类转化为tensor, 并将值归一化： 0.1307 和 0.3081 为该数据集的均值和标准差
# 每一个数据为[28,28]的tensor

train_dataset = datasets.MNIST(
    root='./dataset/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(
    root='./dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False,
                         batch_size=len(test_dataset))  # 测试肯定是


print(f'[size of train_set/test_set]:{len(train_dataset)},{len(test_dataset)}')


The current computing device is cpu 
[size of train_set/test_set]:60000,10000


#### 定义模型

In [12]:
class Inception(nn.Module):
    '''
    input [batch_size, channels, width, height]:b,c,w,h

    初始化时需要指定input_channels

    此模型的输出channel数目为 88 = 24+16+24+24, concatenate four branches with dim 1(channels)
    '''

    def __init__(self, in_channels) -> None:
        super(Inception,self).__init__()
        self.branch_pool = nn.Sequential(
            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 24, kernel_size=1)
        )
        self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.Conv2d(16, 24, kernel_size=5, padding=2)
        )
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.Conv2d(16, 24, kernel_size=3, padding=1),
            nn.Conv2d(24, 24, kernel_size=3, padding=1)
        )

    def forward(self, x):
        branch_pool = self.branch_pool(x)
        branch1x1 = self.branch1x1(x)
        branch5x5 = self.branch5x5(x)
        branch3x3 = self.branch3x3(x)
        output = [branch_pool, branch1x1, branch5x5, branch3x3]
        return torch.cat(output, dim=1)  # [b,c,w,h] c = 88


class Net(nn.Module):
    def __init__(self) -> None:
        super(Net,self).__init__()
        self.layers = nn.Sequential(
            # 输入 [b,1,28,28]
            nn.Conv2d(1, 10, kernel_size=5),  # [b,10,24,24]
            nn.MaxPool2d(2),  # [b,10,12,12]
            nn.ReLU(),
            Inception(10),  # [b,88,12,12]
            nn.Conv2d(88, 20, kernel_size=5),  # [b,20,8,8]
            nn.MaxPool2d(2),    # [b,20,4,4]
            nn.ReLU(),
            Inception(20)   # [b,88,4,4] (b*1408)
        )
        self.fc = nn.Linear(1408, 10)

    def forward(self, x):
        batch_size = x.shape[0]
        x = self.layers(x)
        x = x.view(batch_size,-1)   #[b,1408]
        return self.fc(x)   #[b,10]
    
model = Net()
model.to(device)

Net(
  (layers): Sequential(
    (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
    (3): Inception(
      (branch_pool): Sequential(
        (0): AvgPool2d(kernel_size=3, stride=1, padding=1)
        (1): Conv2d(10, 24, kernel_size=(1, 1), stride=(1, 1))
      )
      (branch1x1): Conv2d(10, 16, kernel_size=(1, 1), stride=(1, 1))
      (branch5x5): Sequential(
        (0): Conv2d(10, 16, kernel_size=(1, 1), stride=(1, 1))
        (1): Conv2d(16, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      )
      (branch3x3): Sequential(
        (0): Conv2d(10, 16, kernel_size=(1, 1), stride=(1, 1))
        (1): Conv2d(16, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (2): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
    )
    (4): Conv2d(88, 20, kernel_size=(5, 5), stride=(1, 1))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, 

#### 设置损失函数和优化器

In [13]:
# construct loss and optimiter

# 包含了softmax层，并且会根据标签类别（即使是多类）,自动构建one-hot计算交叉熵，需要LongTensor类标签
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

#### 设置训练和测试函数

In [14]:
# training and test

loss_list = []
accuracy_list = []


def train(epoch):
    '''某一轮epoch上的训练'''
    epoch_loss = []  # 记录该轮epoch上每个batch的loss
    for batch_idx, batch_data in enumerate(train_loader, 1):
        X, y_label = batch_data
        X, y_label = X.to(device), y_label.to(device)
        # print("debug here: X shape:", X.shape)
        y_pred = model(X)
        loss = criterion(y_pred, y_label)

        epoch_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    average_loss = sum(epoch_loss)/len(epoch_loss)
    loss_list.append(average_loss)
    print(f'[epoch]:{epoch},  [average_loss]: {average_loss}')


def test():
    '''在全集合上测试一次准确率'''
    correct_num = 0
    num = len(test_dataset)
    with torch.no_grad():
        for batch_data in test_loader:
            X, y = batch_data
            X, y = X.to(device) ,y.to(device)
            y_pred = model(X)
            y_pred = torch.argmax(y_pred, dim=1)
            correct_num += torch.sum(y_pred == y).item()
    accuracy = correct_num/num
    accuracy_list.append(accuracy)
    print(f'Current accuracy on the test set is {accuracy}')




#### 训练与测试

In [15]:
# start training now!


num_epochs = 10


for epoch in range(1, num_epochs+1):
    train(epoch)
    test()

[epoch]:1,  [average_loss]: 0.369774169503038
Current accuracy on the whole set is 0.9661


KeyboardInterrupt: 