## 13 NiN模型的Pytorch实现

In [8]:
import torch
import torchvision
from torch import nn, optim
from torch.nn import init
import numpy as np
import sys
from tqdm import tqdm  # 加入进度条库，方便查看训练进度

device="cpu"   # 选择训练设备（CPU或GPU）

### 获取数据集（仍然使用Fashion-MNIST）

保持与原论文相同的输入，将输入图像数据resize为224×224

In [9]:
def load_data_fashion_mnist(batch_size, resize=None, root='Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

In [10]:
batch_size = 128
img_resize = 224
train_iter, test_iter = load_data_fashion_mnist(batch_size, img_resize)

### 定义模型网络等

In [11]:
# 展平
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0],-1)

# 全局平均池化
class GlobalAvgPool(nn.Module):
    def __init__(self):
        super(GlobalAvgPool, self).__init__()
    def forward(self, x):
        return torch.nn.functional.avg_pool2d(x, kernel_size=x.size()[2:])

# NiN块（可以看出Sequential可以嵌套）
def nin_block(in_ch, out_ch, kernel_size, stride, padding):
    blk=nn.Sequential(
        nn.Conv2d(in_ch, out_ch, kernel_size, stride, padding),
        nn.ReLU(),
        nn.Conv2d(out_ch, out_ch, 1),
        nn.ReLU(),
        nn.Conv2d(out_ch, out_ch, 1),
        nn.ReLU(),
    )
    return blk

net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(256, 384, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2), 
    nn.Dropout(0.5),
    # 标签类别数是10
    nin_block(384, 10, kernel_size=3, stride=1, padding=1),
    GlobalAvgPool(), 
    # 将四维的输出转成二维的输出，其形状为(批量大小, 10)
    FlattenLayer()
    )

net=net.to(device)
print(net)

loss=nn.CrossEntropyLoss().to(device)
optimizer=optim.Adam(net.parameters(), lr=0.002)

def test_accuracy(data_iter,net):
    acc_sum, n=0.0, 0
    for X,y in data_iter:
        X=X.to(device)
        y=y.to(device)
        acc_sum+=(net(X).argmax(dim=1)==y).float().sum().item()  #注意这里是sum不是mean 之后会÷n
        n+=y.shape[0]
    return acc_sum / n

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU()
  )
  (1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU()
  )
  (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU()
  )
  (5): MaxPool2d(kernel_size=3, stri

### 训练

In [12]:
num_epochs=5

def train(net,train_iter,test_iter,loss,num_epochs):

    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n=0.0,0.0,0
        net.train()
        for X,y in tqdm(train_iter):
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y).sum()
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum+=l.item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=y.shape[0]
        net.eval()
        test_acc=test_accuracy(test_iter,net)
        print("epoch %d, train_loss %.3f, train_acc %.3f, test_acc %.3f"% (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc))
    

In [None]:
train(net, train_iter, test_iter, loss, num_epochs)
torch.save(net.state_dict(),'Model_Save/13_net_param.pt')