# 数字识别-模型训练
## 1.导入所需模块

In [None]:
import numpy as np
import cv2
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import time
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
import net 
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

## 2.数据预处理
加载手写数字数据集并进行图片预处理

In [None]:
# 数据预处理

transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))])

# 加载数据
trainset = torchvision.datasets.MNIST(root='../../../models/local/datasets/mnist',train=True,download=True,transform=transform)
# 将数据加载器中
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,shuffle=True,num_workers=0)# windows下num_workers设置为0，不然有bug

testset = torchvision.datasets.MNIST(root='../../../models/local/datasets/mnist',train=False,download=True,transform=transform)
testloader = torch.utils.data.DataLoader(testset,batch_size=256,shuffle=False,num_workers=0)

# 将图像显示出来
plt.imshow(trainset.train_data[0].numpy(), cmap='gray')
plt.title('%i' % trainset.train_labels[0])
plt.show()

## 3.参数设置

In [None]:
#net
net = net.Net()

#device : GPU or CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

net.to(device)

num_epochs = 50 #训练次数

#损失函数:这里用交叉熵
criterion = nn.CrossEntropyLoss()

#优化器 这里用SGD
optimizer = optim.SGD(net.parameters(),lr=1e-3, momentum=0.9)

# 滑动平均，作用：随着训练次数的增加而减小学习率
scheduler = StepLR(optimizer, step_size=10, gamma=0.7)

## 4.定义模型训练函数

In [None]:
def train():
    
    print("开始训练")
    correct_preds = 0
    # 在训练次数中做循环
    for epoch in range(num_epochs):
        # 开始训练
        net.train()
        # 从数据加载器中依次读取数据
        for i, data in enumerate(trainloader):
            # 数据包括经过处理后的图像数据，和通过onehot编码处理后的标签。
            inputs, labels = data
            # 是否使用显卡进行加速训练。
            inputs, labels = inputs.to(device), labels.to(device)
            # 梯度归零
            optimizer.zero_grad()
            # 将数据输入网络
            outputs = net(inputs)
            # 计算预测结果与真实标签的损失值
            loss = criterion(outputs, labels)
            # 开始更新模型参数
            loss.backward()
            optimizer.step()
        scheduler.step()

        print('epoch：%d，训练数据集：loss:%.4f'%(epoch+1, loss.item()))
        
        # 开始评估模型
        net.eval()
        correct = 0
        total = 0
        # 依次加载测试数据集
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            with torch.no_grad():
                out = net(images)
                valid_loss = criterion(out, labels)
                prob, predicted = torch.max(out.data, 1)
                total += labels.size(0)
                # 计算预测准确的图片数量
                correct += (predicted == labels).sum().item()
        # 计算准确率
        correct_pre = 100 * correct / total
        print('测试数据集：    loss:%.4f'%(valid_loss.item()))
        print('测试数据集准确率为：{}%'.format(correct_pre))  # 输出识别准确率
        # 保存训练模型
        if correct_pre > correct_preds:
            torch.save(net, 'studens_models/MNIST_student.pth')
            correct_preds = correct_pre
            print("模型已保存")

## 5.开始训练
由于该数据集图片数量较多，在dachbot上进行训练花费时间较长。
预计3分钟训练1个epoch。

In [None]:
train()