In [5]:
# import some packge
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.utils.data as Data
from torch import nn

In [2]:
torch.manual_seed(1)#reproducible

<torch._C.Generator at 0x7f156013c5f0>

In [3]:
#Hyper Parameters
EPOCH = 5
BATCH_SIZE = 64
TIME_STEP = 28 # height of image
INPUT_SIZE = 28 # width of image
LR = 0.01
DOWNLOAD_MINST = True

In [7]:
# download/get the MNIST dataset
train_data = dsets.MNIST(
    root='./mnist/',
    train=True,
    transform=transforms.ToTensor(),
    download=DOWNLOAD_MINST,
)
# 
train_loader = Data.DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
)

test_data = dsets.MNIST(
    root='./mnist/',
    train=False,
)
test_x = torch.unsqueeze(test_data.data,dim=1).type(torch.FloatTensor)/255
test_y = test_data.targets

In [18]:
test_y

tensor([7, 2, 1,  ..., 4, 5, 6])

In [11]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(     # LSTM 效果要比 nn.RNN() 好多了
            input_size=28,      # 图片每行的数据像素点
            hidden_size=64,     # rnn hidden unit
            num_layers=2,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )

        self.out = nn.Linear(64, 10)    # 输出层

    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None)   # None 表示 hidden state 会用全0的 state

        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])
        return out

rnn = RNN().to('cuda')
rnn

RNN(
  (rnn): LSTM(28, 64, num_layers=2, batch_first=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
)

In [12]:
optimizer = torch.optim.Adam(rnn.parameters(),lr=LR)
loss_func = nn.CrossEntropyLoss()

In [None]:
for epoch in range(EPOCH):
    for step,(x,y) in enumerate(train_loader):
        x = x.view(-1,28,28).to('cuda')
        y = y.to('cuda')
        
        output = rnn(x)
        loss = loss_func(output,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step%100 == 0:
            test_out = rnn(test_x.view(-1,28,28).to('cuda')).to('cpu')
            y_pre = torch.max(test_out,1)[1].data.numpy().squeeze()
            acc = sum(y_pre==test_y.numpy())/test_y.size()
            print('EPOCH-',epoch,' | step-',step,' | the loss is ',loss.data.numpy,' ,accuracy is ',acc)

EPOCH- 0  | step- 0  | the loss is  tensor(0.0024, device='cuda:0')  ,accuracy is  [0.9824]
EPOCH- 0  | step- 100  | the loss is  tensor(0.0746, device='cuda:0')  ,accuracy is  [0.9778]
EPOCH- 0  | step- 200  | the loss is  tensor(0.0075, device='cuda:0')  ,accuracy is  [0.9836]
EPOCH- 0  | step- 300  | the loss is  tensor(0.1200, device='cuda:0')  ,accuracy is  [0.9778]
EPOCH- 0  | step- 400  | the loss is  tensor(0.0794, device='cuda:0')  ,accuracy is  [0.9832]
EPOCH- 1  | step- 0  | the loss is  tensor(0.0607, device='cuda:0')  ,accuracy is  [0.9774]
EPOCH- 1  | step- 100  | the loss is  tensor(0.0953, device='cuda:0')  ,accuracy is  [0.9803]
EPOCH- 1  | step- 200  | the loss is  tensor(0.0258, device='cuda:0')  ,accuracy is  [0.9834]
EPOCH- 1  | step- 300  | the loss is  tensor(0.0213, device='cuda:0')  ,accuracy is  [0.9813]
