In [1]:
#加载必要的库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision  import datasets, transforms

In [2]:
#定义超参数
BATCH_SIZE = 32 #每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu" ) #是否用GPU还是CPU训练
EPOCHS = 10 #进行10轮训练数据集

In [3]:
#构建pipeline，对图像做处理
pipeline = transforms.Compose([
    transforms.ToTensor(),# 将图片转化成tensor
    transforms.Normalize((0.1307,),(0.3081,)) # 正则化：降低模型复杂度
])

In [4]:
#下载。加载数据
from torch.utils.data import DataLoader
#下载数据集
train_set = datasets.MNIST("data",train=True,download=True,transform=pipeline)
test_set = datasets.MNIST("data",train=True,download=True,transform=pipeline)
#加载数据
train_loader = DataLoader(train_set,batch_size=BATCH_SIZE,shuffle=True) # 打乱图片，提高精确度
test_loader = DataLoader(test_set,batch_size=BATCH_SIZE,shuffle=True)


In [5]:
#构建网络模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        # 定义卷积层
        self.conv1 = nn.Conv2d(1 , 10 , 5) # 1为灰度图片的通道，10 为输出通道，5 为卷积核kernel
        self.conv2 = nn.Conv2d(10 , 20 ,3)
        #定义全连接层
        self.fc1 = nn.Linear(20*10*10,500)
        self.fc2 = nn.Linear(500 ,10) 
   # 前向传播
    def forward(self,x):
        input_size = x.size(0) # 只拿batch_size
        
        x = self.conv1(x) # 输入：batch*1*28*28，输出：batch*10*24*24
        x = F.relu(x) #让表达能力更强,保持shape不变
        x = F.max_pool2d(x, 2, 2) #运用最大池化层，找到关键信息 提升运算速率 2*2kernel 输出：batch*10*12*12
        
        x = self.conv2(x) #输出：batch*20*10*10
        x = F.relu(x)
        x = x.view(input_size,-1) # 将图片的立体模型拉平，-1自动计算维度，2000
        x = self.fc1(x) # 输入：batch*2000  输出：batch*500
        x = F.relu(x)# 激活函数，保持shape不变
        x = self.fc2(x) #输入：batch*500  输出：batch*10
        output = F.log_softmax(x,dim=1) # 返回最适配模型概率，softmax损失函数 按行来计算，,取log值 ：计算分类后，每个数字的概率值
        return output

In [6]:
#定义优化器
model = CNN().to(DEVICE)
optimizer = optim.Adam(model.parameters())

In [7]:
#创建模型，定义训练函数
def train_model(model,device,train_loader,optimizer,epoch):
    # 模型训练
    model.train()
    for batch_index,(data,target) in enumerate(train_loader):
        # 部署到DEVICE上
        data,traget = data.to(device),target.to(device)
        # 梯度初始化为0
        optimizer.zero_grad()
        # 训练结果
        output = model(data)
        # 计算损失，累计差距
        loss = F.cross_entropy(output, target) #交叉熵损失，处理多分类问题
        # 找到概率值最大的下标
        pred = output.max(1,keepdim=True) # 1：横轴，也可写成pred = output.argmax(dim=1)
        # 反向传播
        loss.backward()
        optimizer.step()
        if batch_index % 3000 == 0:
            print("Train Epoch : {} \t Loss :{:.6f}".format(epoch, loss.item()))
    


In [8]:
#定义测试方法
def test_model(model,device,test_loader):
    # 模型验证
    model.eval()
    #正确率
    correct = 0.0
    # 测试损失
    test_loss = 0.0
    with torch.no_grad(): # 不会计算梯度，也不会进行反向传播
        for data,target in test_loader:
            # 部署到device上
            data,target = data.to(device),target.to(device)
            # 测试数据
            output = model(data)
            # 计算测试损失
            test_loss +=F.cross_entropy(output,target).item()
            # 找到概率最大的下标
            pred = output.max(1,keepdim=True)[1] # 找到：值 索引
            # 累计正确的值
            correct +=pred.eq(target.view_as(pred)).sum().item()
        test_loss /= len(test_loader.dataset)
        print("Test_Average loss : {:.4f},Accuracy : {:.3f}\n".format(
            test_loss,100.0*correct / len(test_loader.dataset)))
            
    

In [None]:
#调用方法
for epoch in range(1,EPOCHS+1):
    train_model(model,DEVICE,train_loader,optimizer,epoch)
    test_model(model,DEVICE,test_loader)

Train Epoch : 1 	 Loss :2.308388
Test_Average loss : 0.0015,Accuracy : 98.533

Train Epoch : 2 	 Loss :0.174327
Test_Average loss : 0.0010,Accuracy : 99.048

Train Epoch : 3 	 Loss :0.004843
