In [7]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

import os

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
# Hyper parameters
sequence_length = 28
num_epochs = 5
num_classes = 10
input_size = 28
hidden_size = 128
num_layers = 2
batch_size = 100
learning_rate = 1e-2

In [12]:
train_dataset = torchvision.datasets.MNIST('../data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST('../data', train=False, transform=transforms.ToTensor(), download=True)

In [11]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, shuffle=True, batch_size=batch_size)

In [None]:
# Recurrent neural network
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

**input_size** – The number of expected features in the input x

**hidden_size** – The number of features in the hidden state h

**num_layers** – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1

**bias** – If False, then the layer does not use bias weights b_ih and b_hh. Default: True

**batch_first** – If True, then the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature). Note that this does not apply to hidden or cell states. See the Inputs/Outputs sections below for details. Default: False

**dropout** – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0

**bidirectional** – If True, becomes a bidirectional LSTM. Default: False

**proj_size** – If > 0, will use LSTM with projections of corresponding size. Default: 0