## 12 CNN网络-VGG模型的Pytorch实现

In [1]:
import torch
import torchvision
from torch import nn, optim
from torch.nn import init
import numpy as np
import sys
from tqdm import tqdm  # 加入进度条库，方便查看训练进度

device="cuda:2"   # 选择训练设备（CPU或GPU）

### 获取数据集（仍然使用Fashion-MNIST）

保持与原论文相同的输入，将输入图像数据resize为224×224

In [2]:
def load_data_fashion_mnist(batch_size, resize=None, root='Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

In [3]:
batch_size = 128
img_resize = 224
train_iter, test_iter = load_data_fashion_mnist(batch_size, img_resize)

### 定义模型网络等

In [4]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0],-1)

def VGG_block(num_convs, in_ch, out_ch):
    blk=[]
    for i in range(num_convs):
        if i==0: 
            blk.append(nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1))
        else: #如有多次conv,第一次in_ch->out_ch，之后通道数不变
            blk.append(nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
    blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(2,2))
    return nn.Sequential(*blk)  # *号将列表解构为独立的参数

conv_arch = ((1, 1, 8), (1, 8, 16), (2, 16, 32), (2, 32, 64), (2, 64, 64))

net=nn.Sequential()

# 加入VGG卷积块
for i, (num_convs, in_ch, out_ch) in enumerate(conv_arch):  #enumerate将conv_arch加上索引一同进入循环
    net.add_module("VGG_block_" + str(i+1), VGG_block(num_convs, in_ch, out_ch))

# 加入全连接层
net.add_module("fc", nn.Sequential(FlattenLayer(),
                                 nn.Linear(3136, 512),
                                 nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(512, 256),
                                 nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(256, 10)
                                ))

net=net.to(device)
print(net)

loss=nn.CrossEntropyLoss().to(device)
optimizer=optim.Adam(net.parameters(), lr=0.001)

def test_accuracy(data_iter,net):
    acc_sum, n=0.0, 0
    for X,y in data_iter:
        X=X.to(device)
        y=y.to(device)
        acc_sum+=(net(X).argmax(dim=1)==y).float().sum().item()  #注意这里是sum不是mean 之后会÷n
        n+=y.shape[0]
    return acc_sum / n

Sequential(
  (VGG_block_1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (VGG_block_2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (VGG_block_3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (VGG_block_4): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (VGG_bl

### 训练

In [5]:
num_epochs=5

def train(net,train_iter,test_iter,loss,num_epochs):

    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n=0.0,0.0,0
        net.train()
        for X,y in tqdm(train_iter):
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y).sum()
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum+=l.item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=y.shape[0]
        net.eval()
        test_acc=test_accuracy(test_iter,net)
        print("epoch %d, train_loss %.3f, train_acc %.3f, test_acc %.3f"% (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc))
    

In [6]:
train(net, train_iter, test_iter, loss, num_epochs)
torch.save(net.state_dict(),'Model_Save/12_net_param.pt')

100%|██████████| 469/469 [00:15<00:00, 30.16it/s]


epoch 1, train_loss 0.005, train_acc 0.759, test_acc 0.859


100%|██████████| 469/469 [00:14<00:00, 31.44it/s]


epoch 2, train_loss 0.003, train_acc 0.868, test_acc 0.888


100%|██████████| 469/469 [00:14<00:00, 31.99it/s]


epoch 3, train_loss 0.003, train_acc 0.883, test_acc 0.891


100%|██████████| 469/469 [00:14<00:00, 31.34it/s]


epoch 4, train_loss 0.002, train_acc 0.894, test_acc 0.891


100%|██████████| 469/469 [00:14<00:00, 31.68it/s]


epoch 5, train_loss 0.002, train_acc 0.901, test_acc 0.909
