In [20]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms,datasets
from torch import nn,optim
from torchvision.datasets import MNIST

import matplotlib.pyplot as plt
print(torch.cuda.is_available())  # 应该返回True
print(torch.cuda.device_count())  # 返回可用的GPU数量

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')
# torch.cuda.amp


True
1
Using device: cuda


In [21]:
# 载入训练集数据，同时把数据转换为tensor，同时下载数据
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)

# 载入测试集数据，同时把数据转换为tensor，同时下载数据
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# 文件分别为训练集测试集他们分别的数据和标签

In [22]:
# 批次大小
batch_size = 64
# 装载数据集,dataloader为数据的装载器，数据来源于dataset=train_dataset, 大小为batch_size=batch_size，方式为随机打乱
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=8)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=8)
# 展示数据
for i,data in enumerate(train_loader):
    inputs, labels = data
    print(inputs.shape)
    print(labels.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [23]:
Dop = 0.2 # 0.1的神经元不工作
class LSTM(nn.Module):
    # 初始化，定义网络结构
    def __init__(self):
        # 初始化nnModule
        super(LSTM, self).__init__()
        # input：输入特征的大小
        # hidden：LSTM模块的数量
        # layers：隐藏层的层数
        # lstm默认input(seq_len, batch, feature)
        # batchfirst后，变成(feature, seq_len, batch)
        self.lstm = nn.LSTM(
            input_size=28,
            hidden_size=64,
            num_layers=1,
            batch_first=True
        )
        self.fc = nn.Linear(64, 10)
        self.softmax = nn.Softmax(dim=1)
        
        
        
    # 前向计算，定义网络计算
    def forward(self, x):
        x = x.view(-1, 28, 28)
        # 三个输出
        # output:[batch,seq_len,hidden_size]，包含每个序列的输出结果
        # seq_len取27时，就是最后一个结果
        # 虽然LSTW的batch_first为True，但是h_n,c_n的第0个维度还是numlayers
        # h_n:[num_layers, batch, hidden_size]，只包含最后一个序列的输出结果hidden
        # c_n:[num_layers, batch, hidden_size]，只包含最后一个序列的输出结果cell
        output, (h_n,c_n) = self.lstm(x)
        #output_last = self.fc(output[:, -1, :])  #只要最后一个结果
        output_last = h_n[-1, :, :]  #只要最后一个结果
        x = self.fc(output_last)
        outs = self.softmax(x)
        
        return outs

In [24]:
LR = 0.001
# 定义模型
model = LSTM()
model.to(device)
# 定义损失函数,交叉熵损失
cross_entropy = nn.CrossEntropyLoss()
# 优化器，设置L2正则化
optimizer = optim.Adam(model.parameters(), lr=LR ,weight_decay=0.0001)

In [25]:
def train():
    # 训练状态
    model.train()
    for i,data in enumerate(train_loader):
        # 获得一个批次的数据和标签
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        out = model(inputs)
        # 交叉熵无需使用独热编码
        loss = cross_entropy(out, labels)
        # 梯度清零，计算梯度，修改权值
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def test():
    # 测试状态
    model.eval()
    # 计算测试集的准确率
    correct = 0
    for i, data in enumerate(test_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        out = model(inputs)
        # 计算out中，最大值所在位置
        _, predicted = torch.max(out.data, 1)
        
        correct += (predicted == labels).sum()
    print("Test acc:{0}".format(correct.item() / len(test_loader.dataset)))
    
    
    # 计算训练集的准确率
    correct = 0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        out = model(inputs)
        # 计算out中，最大值所在位置
        _, predicted = torch.max(out.data, 1)
        
        correct += (predicted == labels).sum()
    print("Train acc:{0}".format(correct.item() / len(train_loader.dataset)))
        

In [26]:
for epoch in range(10):
    
    train()
    print('epoch:', epoch,' train over')
    
    test()
    
    if LR >= 0.00001:
        LR = LR * 0.3
    
    
# 一共1w张测试集

epoch: 0  train over
Test acc:0.8269
Train acc:0.8154666666666667
epoch: 1  train over
Test acc:0.8467
Train acc:0.8456666666666667
epoch: 2  train over
Test acc:0.9222
Train acc:0.9228333333333333
epoch: 3  train over
Test acc:0.9429
Train acc:0.9381833333333334
epoch: 4  train over
Test acc:0.9385
Train acc:0.9405833333333333
epoch: 5  train over
Test acc:0.9588
Train acc:0.96035
epoch: 6  train over
Test acc:0.9597
Train acc:0.9622333333333334
epoch: 7  train over
Test acc:0.9602
Train acc:0.96325
epoch: 8  train over
Test acc:0.9586
Train acc:0.9597833333333333
epoch: 9  train over
Test acc:0.9638
Train acc:0.9645333333333334


# 保存模型

In [28]:
torch.save(model.state_dict(), 'modle/LSTMnum.pth')