In [2]:
import torch
import numpy as np 
from torch.autograd import Variable
import torch.nn.functional as fun
import matplotlib.pyplot as plt
import torch.utils.data as Data
import torch.nn as nn
import torchvision 
import torchvision.datasets as dsets

In [3]:
#超参数
BATCH_SIZE=64
EPOCH=1
TIME_STEP=28
INPUT_SIZE=28
LR=0.01
DOWNLOAD_MNIST=False

In [4]:
#引用训练集、测试集
train_data=dsets.MNIST(
    root='./mnist',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=DOWNLOAD_MNIST,
)
train_loader=Data.DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)
test_data=dsets.MNIST(
    root='./mnist/',
    train=False,
    transform=torchvision.transforms.ToTensor()
)
with torch.no_grad():
    test_x=Variable(test_data.data).type(torch.FloatTensor)[:2000]/255
test_y=test_data.targets[:2000]

In [5]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN,self).__init__()
        self.rnn=nn.LSTM(
            input_size=INPUT_SIZE, #分批输入的数据大小（28*28）中的一个28
            hidden_size=64, #隐藏层大小
            num_layers=1, #隐藏层层数
            batch_first=True  #（batch,time_step,input_size）中batch在第一位
        )
        self.out=nn.Linear(64,10)
    def forward(self,x):
        #x输入,生成h_n和h_c,h_n和h_c又会成为第二批的输入，None表示：第一批没有h_n和h_c参数
        r_out,(h_n,h_c)=self.rnn(x,None) #x:（batch,time_step,input_size）
        out=self.out(r_out[:,-1,:]) #（batch,time_step,input_size）对最后一层算一个nn神经网络
        return out

In [6]:
#初始化
rnn=RNN()
print(rnn)

RNN(
  (rnn): LSTM(28, 64, batch_first=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
)


In [7]:
#优化器
optimizer=torch.optim.Adam(rnn.parameters(),lr=LR)
loss_func=nn.CrossEntropyLoss()

In [8]:
#训练
for epoch in range(EPOCH):
    for step ,(x,y) in enumerate(train_loader):
        b_x=Variable(x.view(-1,28,28))
        b_y=Variable(y)
        output=rnn(b_x)
        loss=loss_func(output,b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step%50==0:
            test_output=rnn(test_x)
            pred_y=torch.max(test_output,1)[1].data.squeeze()
            accurancy=((pred_y==test_y).sum().item())/test_y.size(0)
            print('Epoch:',epoch,'|train loss:%.4f'%loss.item(),'|test accurancy:',accurancy)

Epoch: 0 |train loss:2.3035 |test accurancy: 0.101
Epoch: 0 |train loss:1.0986 |test accurancy: 0.5395
Epoch: 0 |train loss:0.9550 |test accurancy: 0.729
Epoch: 0 |train loss:0.4884 |test accurancy: 0.8215
Epoch: 0 |train loss:0.4959 |test accurancy: 0.841
Epoch: 0 |train loss:0.2516 |test accurancy: 0.889
Epoch: 0 |train loss:0.1963 |test accurancy: 0.899
Epoch: 0 |train loss:0.2865 |test accurancy: 0.8895
Epoch: 0 |train loss:0.3893 |test accurancy: 0.9115
Epoch: 0 |train loss:0.1440 |test accurancy: 0.9345
Epoch: 0 |train loss:0.0991 |test accurancy: 0.9425
Epoch: 0 |train loss:0.0992 |test accurancy: 0.946
Epoch: 0 |train loss:0.3270 |test accurancy: 0.9405
Epoch: 0 |train loss:0.2118 |test accurancy: 0.9415
Epoch: 0 |train loss:0.1426 |test accurancy: 0.935
Epoch: 0 |train loss:0.1260 |test accurancy: 0.954
Epoch: 0 |train loss:0.0981 |test accurancy: 0.9545
Epoch: 0 |train loss:0.0850 |test accurancy: 0.952
Epoch: 0 |train loss:0.2127 |test accurancy: 0.954


In [9]:
#打印
test_output=rnn(test_x[:10])
pred_y=torch.max(test_output,1)[1].data.numpy().squeeze()
print(pred_y,'prediction number')
print(test_y[:10].numpy(),'real number')

[7 2 1 6 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number
