In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# 定义超参数
input_size = 28 #MNIST数据集28*28
num_classes = 10 #标签类别数
batch_size = 32
epochs = 5
lr=1e-2

In [None]:
#为使用VGG16，将28*28->64*64
custom_transform = transforms.Compose([transforms.Resize([64,64]),
                                      transforms.ToTensor()])
#训练集
train_ds = datasets.MNIST(root="./",
                          train=True,
                         transform = custom_transform,
                         download=True)

#测试集
test_ds = datasets.MNIST(root="./",
                          train=False,
                         transform = custom_transform)

#使用dataloader加载数据
train_dl = DataLoader(dataset=train_ds,
                                      batch_size=batch_size,
                                      shuffle=True)
test_dl = DataLoader(dataset=test_ds,
                                     batch_size=batch_size,
                                     shuffle=True)

# 卷积网络模块构建

In [None]:
class VGG16(nn.Module):
    def __init__(self, num_classes):
        super(VGG16, self).__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=1,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=64,
                      out_channels=128,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=128,
                      out_channels=256,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
    
        self.block_4 = nn.Sequential(
            nn.Conv2d(in_channels=256,
                      out_channels=512,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        
        self.block_5 = nn.Sequential(
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.block_5(x)
        logits = self.fc(x.view(-1, 512 * 2 * 2))
        prob = F.softmax(logits, dim=1)
        return logits, prob

In [None]:
model = VGG16(10)
# print(model)

In [None]:
#定义损失函数
optimizer = optim.Adam(model.parameters(),lr=lr)

#定义设备
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = model.to(device)

In [None]:
def compute_acc_and_loss(model, data_loader, device):
    correct_pred, nums = 0, 0
    total_loss = 0.
    for i, (features, targets) in enumerate(data_loader):
        # 将数据放到GPU上
        features = features.to(device)
        targets = targets.to(device)
        
        #前向传播得到预测值以及softmax分类结果值
        logits, probas= model(features)
        
        #计算总损失
        total_loss += F.cross_entropy(logits, targets).item()
        
        #计算最大的结果标签
        _, pred_labels = torch.max(probas, 1)
        correct_pred += (pred_labels == targets).sum()
        
        #累加总数
        nums += targets.size(0)
    return float(correct_pred) / nums, total_loss

In [None]:
for epoch in range(epochs):
    #模型训练
    model.train()
    for i, (features, targets) in enumerate(train_dl):
        # 将数据放到GPU上
        features = features.to(device)
        targets = targets.to(device)        
        #梯度清零
        optimizer.zero_grad()
        
        #前向传播
        logits, prob = model.forward(features)
        
        #计算损失
        loss = F.cross_entropy(logits, targets)
                
        #反向传播
        loss.backward()
                        
        #参数更新
        optimizer.step()
        
        #打印信息
        if i % 100 == 0:
            print("epoch:{}/{}, iter:{}/{}, loss:{}"
                  .format(epoch+1,epochs,i,len(train_dl),
                          loss))
    
    train_acc_list, train_loss_list = [], []
    test_acc_list, test_loss_list = [], []
    
    #模型验证
    model.eval()
    with torch.set_grad_enabled(False):
        train_acc, train_loss = compute_acc_and_loss(model, train_dl, device)
        test_acc, test_loss = compute_acc_and_loss(model, test_dl, device)
        train_acc_list.append(train_acc)
        train_loss_list.append(train_loss)
        test_acc_list.append(test_acc)
        test_loss_list.append(test_loss)
        print("train acc:{:.4f}, test acc:{:.4f}, test loss:{:.4f}"
              .format(train_acc, train_loss, test_acc, test_loss))
