In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
import argparse
import matplotlib as plt

# training settings
batch_size = 64 
lr = 0.001 
n_epoch = 5

# 定义是否使用GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# MNSIT dataset
train_dataset = datasets.MNIST(root='./data/', 
                               train=True,
                               transform=transforms.ToTensor(),
                               download=False
                              )

test_dataset = datasets.MNIST(root='./data/',
                              train=False,
                              transform=transforms.ToTensor()
                             )

# data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True
                                          )

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False
                                          )

In [2]:
# 定义网络结构
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(  # input_size=(1*28*28)
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),   # input_size=(6*28*28)
            nn.MaxPool2d(kernel_size=2, stride=2),  # output_size=(6*14*14)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5),
            nn.ReLU(),  # input_szie=(16*10*10)
            nn.MaxPool2d(2, 2),  # output_szie=(16*5*5)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.ReLU(),
        )
        self.fc3 = nn.Linear(84, 10) 
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x) 
        # nn.Linear()的输入输出都是维度为一的值，所以要把多维度的tensor展平成一维 
        x = x.view(x.size()[0], -1) 
        x = self.fc1(x)
        x = self.fc2(x) 
        x = self.fc3(x)
        return x
    

In [9]:
# # 命令行参数设置
# parser = argparse.ArgumentParser()
# parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #模型保存路径
# parser.add_argument('--net', default='./model/net.pth', help="path to netG (to continue training)")  #模型加载路径
# opt = parser.parse_args()

usage: ipykernel_launcher.py [-h] [--outf OUTF] [--net NET]
ipykernel_launcher.py: error: unrecognized arguments: -f /run/user/1005/jupyter/kernel-6bbe65c2-9a45-464d-8c2e-29ab11a5c8cd.json


SystemExit: 2

In [3]:
# 定义损失函数loss function 和优化方式（采用SGD）
net = LeNet().to(device)
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数，通常用于多分类问题上
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)

In [4]:
# train
if __name__ == '__main__':
    for epoch in range(n_epoch):
        sum_loss = 0.0
        for idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            # forward + backward
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # 每训练100个batch打印一次平均loss
            sum_loss += loss.item()
            if idx % 100 == 0: 
                print('epoch:{} - {}/{}\tLoss:{:.6f}'.format(epoch, idx, len(train_loader), sum_loss / 100))
                sum_loss = 0
        
        # 每跑完一次epoch测试一下准确率
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = net.forward(images)
                # 取得分最高的类
                _, pred = torch.max(outputs.data, 1) 
                total += labels.size(0)
                correct += (pred == labels).sum()
        
        print('第{}个epoch的识别准确率为：{:.3f}%'.format(epoch, 100.0 * correct / total))
    torch.save(net.state_dict(), '{}/net_{}.pth'.format('./models', epoch))
    

epoch:0 - 0/938	Loss:0.023034
epoch:0 - 100/938	Loss:2.301380
epoch:0 - 200/938	Loss:2.297725
epoch:0 - 300/938	Loss:2.292021
epoch:0 - 400/938	Loss:2.285759
epoch:0 - 500/938	Loss:2.273999
epoch:0 - 600/938	Loss:2.248425
epoch:0 - 700/938	Loss:2.175310
epoch:0 - 800/938	Loss:1.893731
epoch:0 - 900/938	Loss:1.113367
第0个epoch的识别准确率为：80.000%
epoch:1 - 0/938	Loss:0.007317
epoch:1 - 100/938	Loss:0.613155
epoch:1 - 200/938	Loss:0.499569
epoch:1 - 300/938	Loss:0.419155
epoch:1 - 400/938	Loss:0.410327
epoch:1 - 500/938	Loss:0.373323
epoch:1 - 600/938	Loss:0.345462
epoch:1 - 700/938	Loss:0.337054
epoch:1 - 800/938	Loss:0.301597
epoch:1 - 900/938	Loss:0.290528
第1个epoch的识别准确率为：91.000%
epoch:2 - 0/938	Loss:0.001744
epoch:2 - 100/938	Loss:0.261486
epoch:2 - 200/938	Loss:0.275710
epoch:2 - 300/938	Loss:0.241693
epoch:2 - 400/938	Loss:0.242607
epoch:2 - 500/938	Loss:0.224495
epoch:2 - 600/938	Loss:0.203622
epoch:2 - 700/938	Loss:0.218823
epoch:2 - 800/938	Loss:0.196999
epoch:2 - 900/938	Loss:0.21136