In [11]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision
import numpy as np
import pandas as pd
import platform

### 1. 加载数据集(https://blog.csdn.net/qq_52643100/article/details/140360548)

In [4]:
# 加载数据集(https://blog.csdn.net/qq_52643100/article/details/140360548)
train_dataset = datasets.MNIST(root="./data/", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root="./data/", train=False, transform=transforms.ToTensor())

In [5]:
class_names = train_dataset.classes # 查看类别/标签
print(class_names)

['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']


In [6]:
# 查看数据集的大小shape
print(train_dataset.data.shape)

torch.Size([60000, 28, 28])


In [7]:
print(train_dataset.targets.shape)

torch.Size([60000])


In [8]:
print(test_dataset.data.shape)

torch.Size([10000, 28, 28])


In [9]:
print(test_dataset.targets.shape)

torch.Size([10000])


### 定义超参数

In [10]:
BATCH_SIZE = 64 # 每批读取的数据大小
EPOCHS = 10 # 训练10轮

### 创建数据集的可迭代对象，即一个batch一个batch的读取数据

In [12]:
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE,shuffle=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [14]:
# 查看一批batch的数据
images, labels = next(iter(test_loader))

In [15]:
print(images.shape)

torch.Size([64, 1, 28, 28])


In [16]:
print(labels.shape)

torch.Size([64])


### 定义RNN模型

In [19]:
class RNN_Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNN_Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
        # 全连接层
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # 
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
        # 分离隐藏状态，避免梯度爆炸
        out, hn = self.rnn(x, h0.detach())
        out = self.fc(out[:, -1, :])
        return out

### 初始化模型

In [21]:
input_dim = 28 # 输入维度
hidden_dim = 100 # 隐层的维度
layer_dim = 2 # 2层RNN
output_dim = 10 # 输出维度

model = RNN_Model(input_dim, hidden_dim, layer_dim, output_dim)
# 判断是否有GPU
device = torch.device("mps")

### 定义损失函数

In [22]:
criterion = nn.CrossEntropyLoss()

### 定义优化器

In [23]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### 输出模型参数信息

In [24]:
print(len(list(model.parameters())))

10


### 模型训练

In [25]:
sequence_dim = 28 # 序列长度

In [None]:
loss_list = [] #保存loss
accuracy_list = [] # 保存accuracy
iteration_list = [] # 保存循环次数

iter = 0
for epoch in range(EPOCHS):
    for i, (images, labels) in enumerate(train_loader):
        model.train() # 声明训练
        # 一个batch的数据转换为RNN的输入维度
        image = images.view(-1, sequence_dim, input_dim).requires_grad_().to(device)
        labels = labels.to(device)
        # 梯度清零
        optimizer.zero_grad()
        # 前向传播
        outputs = model(images)
        # 计算损失
        loss = criterion(outputs, labels)
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()
        # 计数器自动加1
        iter += 1
        # 模型验证
        if iter % 500 == 0:
            model.eval() # 声明
            # 计算验证的accuracy
            correct = 0.0
            total = 0.0
            # 迭代测试集，获取数据，预测
            for images, labels in test_loader:
                image = images.view(-1, sequence_dim, input_dim).to(device)
                # 模型预测
                outputs = model(image)
                # 获取预测概率最大值的下标
                predict = torch.max(outputs.data, 1)[1]
                # 统计测试集的大小
                total += labels.size(0)
                # 统计判断/预测正确的数量
                