In [0]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [0]:
## Device configuration

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [0]:
## Hyper-parameters

sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.01

In [4]:
## MNIST dataset

train_dataset = torchvision.datasets.MNIST(root = '../../data/',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)

test_dataset = torchvision.datasets.MNIST(root = '../../data/',
                                          train = False,
                                          transform = transforms.ToTensor())

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:02, 3588330.00it/s]                             


Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 58401.61it/s]                           
0it [00:00, ?it/s]

Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 961883.57it/s]                             
0it [00:00, ?it/s]

Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 21545.99it/s]            

Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!





In [0]:
## Data loader

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = batch_size,
                                          shuffle = False)

In [0]:
## Recurrent neural network (many to one)

class RNN(nn.Module):
  
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
    self.fc = nn.Linear(hidden_size, num_classes)
    
  def forward(self, x):
    ## Set initial hidden and cell states
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    
    ## Forward propagate LSTM
    out, _ = self.lstm(x, (h0, c0))    ## out: tensor of shape (batch_size, seq_length, hidden_size)
    
    ## Decode the hidden state of the last time step
    out = self.fc(out[:, -1, :])
    return out
  
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [0]:
## Loss and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr = learning_rate)

In [8]:
## Train the model

total_step = len(train_loader)

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)
    
    ## Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)
    
    ## Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (i + 1) % 100 == 0:
      print ('Epoch [{} / {}], Step [{} / {}], Loss: {:.4f}'
             .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1 / 2], Step [100 / 600], Loss: 0.4664
Epoch [1 / 2], Step [200 / 600], Loss: 0.4390
Epoch [1 / 2], Step [300 / 600], Loss: 0.2920
Epoch [1 / 2], Step [400 / 600], Loss: 0.1266
Epoch [1 / 2], Step [500 / 600], Loss: 0.0306
Epoch [1 / 2], Step [600 / 600], Loss: 0.1964
Epoch [2 / 2], Step [100 / 600], Loss: 0.0453
Epoch [2 / 2], Step [200 / 600], Loss: 0.1997
Epoch [2 / 2], Step [300 / 600], Loss: 0.1652
Epoch [2 / 2], Step [400 / 600], Loss: 0.1711
Epoch [2 / 2], Step [500 / 600], Loss: 0.0513
Epoch [2 / 2], Step [600 / 600], Loss: 0.2110


In [9]:
## Test the model

with torch.no_grad():
  
  correct = 0
  total = 0
  
  for images, labels in test_loader:
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
  print('Test Accuracy of the model on the 10000 test images: {} %'
        .format(100 * correct / total))

Test Accuracy of the model on the 10000 test images: 97.15 %


In [0]:
## Save the model checkpoint

torch.save(model.state_dict(), 'model.ckpt')