In [1]:
import torch
import torchvision
from torch import nn
from torchvision.transforms import ToTensor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 5
learning_rate = 1e-3

In [4]:
train_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=True,
                                          transform=ToTensor())

test_dataset = torchvision.datasets.MNIST(root='./data',
                                         train=False,
                                         transform=ToTensor())

In [5]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=batch_size)

In [6]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [7]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print('Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch: [1/5], Step: [100/600], Loss: 0.7237
Epoch: [1/5], Step: [200/600], Loss: 0.2606
Epoch: [1/5], Step: [300/600], Loss: 0.1849
Epoch: [1/5], Step: [400/600], Loss: 0.2110
Epoch: [1/5], Step: [500/600], Loss: 0.1343
Epoch: [1/5], Step: [600/600], Loss: 0.1711
Epoch: [2/5], Step: [100/600], Loss: 0.1431
Epoch: [2/5], Step: [200/600], Loss: 0.2098
Epoch: [2/5], Step: [300/600], Loss: 0.0458
Epoch: [2/5], Step: [400/600], Loss: 0.1485
Epoch: [2/5], Step: [500/600], Loss: 0.2052
Epoch: [2/5], Step: [600/600], Loss: 0.0102
Epoch: [3/5], Step: [100/600], Loss: 0.0282
Epoch: [3/5], Step: [200/600], Loss: 0.0064
Epoch: [3/5], Step: [300/600], Loss: 0.0560
Epoch: [3/5], Step: [400/600], Loss: 0.0814
Epoch: [3/5], Step: [500/600], Loss: 0.0665
Epoch: [3/5], Step: [600/600], Loss: 0.0625
Epoch: [4/5], Step: [100/600], Loss: 0.1981
Epoch: [4/5], Step: [200/600], Loss: 0.0441
Epoch: [4/5], Step: [300/600], Loss: 0.0179
Epoch: [4/5], Step: [400/600], Loss: 0.0372
Epoch: [4/5], Step: [500/600], L

In [10]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        predicted = outputs.argmax(1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    print('Test accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Test accuracy of the model on the 10000 test images: 98.34 %
