In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as f
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
#定义超参数
BATCH_SIZE = 16 #每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")#是否用GPU训练
EPOCHS = 10 #训练数据集的轮次

In [3]:
#构建pipeline，对图像做处理
pipeline = transforms.Compose([
  transforms.ToTensor(),#将图片转换成tensor
  transforms.Normalize((0.1307,),(0.3081,))#正则化：降低模型复杂度
])

In [4]:
from torch.utils.data import DataLoader

train_set = datasets.MNIST("data",train=True,download=True,transform=pipeline)

test_set = datasets.MNIST("data",train=False,download=True,transform=pipeline)

train_loader = DataLoader(train_set,batch_size=BATCH_SIZE,shuffle=True)

test_loader = DataLoader(test_set,batch_size=BATCH_SIZE,shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [17]:
class Digit(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1,10,5)#1:灰度图片的通道， 10:输出通道， 5:kernel
    self.conv2 = nn.Conv2d(10,20,3)# 10:输入通道， 20:输出通道， 3:Kernel
    self.fc1 = nn.Linear(20*10*10,500)#20*10*10:输入通道，500:输出通道
    self.fc2=nn.Linear(500,10)#500:输入通道， 10:输出通道

  def forward(self,x):
    input_size = x.size(0)#batch-size
    x = self.conv1(x)#输入：batch*1*28*28，输出：batch*10*24*24（28-5+1=24）
    x = f.relu(x)#保持shape不变，输出：batch*10*24*24
    x = f.max_pool2d(x,2,2)#输入：batch*10*12*12 输出：batch*10*12*12

    x = self.conv2(x)#输入：batch*10*12*12 输出：batch*20*10*10（12-3+1=10）
    x = f.relu(x)

    x = x.view(input_size,-1)#拉平，-1:自动计算维度，20*10*10=2000

    x = self.fc1(x)#输入：batch*2000 输出：batch*500
    x = f.relu(x)#保持shape不变

    x = self.fc2(x)#输入：batch*500 输出：batch*10

    output = f.log_softmax(x,dim=1)#计算分类后，每个数字的概率值

    return output

In [19]:
#定义优化器
model = Digit().to(DEVICE)

optimizer = optim.Adam(model.parameters())

In [20]:
#定义训练方法
def train_model(model,device,train_loader,optimizer,epoch):
  #模型训练
  model.train()
  for batch_index,(data,target) in enumerate(train_loader):
    #部署到DEVICE上面
    data,target = data.to(device),target.to(device)
    #梯度初始化为0
    optimizer.zero_grad()
    #训练后的结果
    output = model(data)
    #计算损失
    loss = f.cross_entropy(output,target)
    #反向传播
    loss.backward()
    #参数优化
    optimizer.step()
    if batch_index % 3000 == 0:
      print("Train Epoch : {} \t Loss : {:.6f}".format(epoch,loss.item()))


In [25]:
#定义测试方法
def test_model(model,device,test_loader):
  #模型验证
  model.eval()
  #正确率
  correct = 0.0
  #测试损失
  test_loss = 0.0
  with torch.no_grad():#不会计算梯度，也不会反向传播
    for data,target in test_loader:
      #部署到device上
      data,target = data.to(device),target.to(device)
      #测试数据
      output = model(data)
      #计算测试损失
      test_loss += f.cross_entropy(output,target).item()
      #找到概率值最大的下标
      pred = output.max(1,keepdim=True)[1]#值，索引
      #pred = torch.max(output,dim=1)
      #pred = output.argmax(dim=1)
      #累计正确的值
      correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print("Test -- Average loss : {:.4f},Accuracy : {:.3f}\n".format(test_loss,100.0*correct/len(test_loader.dataset)))

In [26]:
#调用训练和测试方法
for epoch in range(1,EPOCHS+1):
  train_model(model,DEVICE,train_loader,optimizer,epoch)
  test_model(model,DEVICE,test_loader)

Train Epoch : 1 	 Loss : 0.046062
Train Epoch : 1 	 Loss : 0.000484
Test -- Average loss : 0.0023,Accuracy : 98.820

Train Epoch : 2 	 Loss : 0.008116
Train Epoch : 2 	 Loss : 0.015171
Test -- Average loss : 0.0024,Accuracy : 98.980

Train Epoch : 3 	 Loss : 0.000134
Train Epoch : 3 	 Loss : 0.000110
Test -- Average loss : 0.0029,Accuracy : 98.950

Train Epoch : 4 	 Loss : 0.000003
Train Epoch : 4 	 Loss : 0.003493
Test -- Average loss : 0.0036,Accuracy : 98.680

Train Epoch : 5 	 Loss : 0.000092
Train Epoch : 5 	 Loss : 0.000013
Test -- Average loss : 0.0035,Accuracy : 98.900

Train Epoch : 6 	 Loss : 0.000000
Train Epoch : 6 	 Loss : 0.000821
Test -- Average loss : 0.0034,Accuracy : 99.010

Train Epoch : 7 	 Loss : 0.000000
Train Epoch : 7 	 Loss : 0.000011
Test -- Average loss : 0.0036,Accuracy : 98.940

Train Epoch : 8 	 Loss : 0.000000
Train Epoch : 8 	 Loss : 0.000020
Test -- Average loss : 0.0048,Accuracy : 98.810

Train Epoch : 9 	 Loss : 0.000010
Train Epoch : 9 	 Loss : 0.000