# RNN(Recurrent Neural Network)으로 MNIST 숫자 구분하기

## 0. Define Hyper-parameters and device configuration

In [1]:
import torch

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 10
num_classes = 10
batch_size = 50
num_epochs = 3
learning_rate = 0.001

## 1. Load Data

In [3]:
import torchvision
import torchvision.transforms as transforms

In [4]:
train_data = torchvision.datasets.MNIST(root='./datasets',
                                        train=True,
                                        transform=transforms.ToTensor(),
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./datasets',
                                        train=False,
                                        transform=transforms.ToTensor(),
                                        download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./datasets/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


## 2. Define Dataloader

In [5]:
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                          batch_size=batch_size,
                                          shuffle=False)

In [6]:
# cf) Check dataloader shape
image, label = next(iter(test_loader))
print(image.size()) # [Batch, Channel, Height, Width]

torch.Size([50, 1, 28, 28])


## 3. Define Model

In [7]:
import torch.nn as nn
import torch.nn.functional as F

In [8]:
class RNN(nn.Module):
  def __init__(self, intput_size, hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    # set initial hidden states and cell states
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # torch.size([2, 50, 128])
    c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # torch.size([2, 50, 128])

    #Forward propagate LSTM
    out, _  = self.lstm(x, (h0, c0)) # output: tensor [batch_size, seq_length, hidden_size]

    #Decode the hidden state of the last time step
    out = self.fc(out[:,-1,:])

    return out

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)




## 4. Set Loss & Optimizer

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## 5. Train and Test

In [10]:
####### Train #######
total_step = len(train_loader)
for epoch in range(num_epochs):
  for i, (image, label) in enumerate(train_loader):
    image = image.reshape(-1, sequence_length, input_size).to(device)
    label = label.to(device)

    # Forward
    output = model(image)
    loss = criterion(output, label)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 400 == 0:
      print("Epoch [{}/{}], Step[{}/{}], Loss:{:.4f}".format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/3], Step[100/1200], Loss:1.9207
Epoch [1/3], Step[200/1200], Loss:1.3806
Epoch [1/3], Step[300/1200], Loss:1.2739
Epoch [1/3], Step[400/1200], Loss:0.6820
Epoch [1/3], Step[500/1200], Loss:0.6650
Epoch [1/3], Step[600/1200], Loss:0.7997
Epoch [1/3], Step[700/1200], Loss:0.6082
Epoch [1/3], Step[800/1200], Loss:0.5610
Epoch [1/3], Step[900/1200], Loss:0.3413
Epoch [1/3], Step[1000/1200], Loss:0.2130
Epoch [1/3], Step[1100/1200], Loss:0.2753
Epoch [1/3], Step[1200/1200], Loss:0.6794
Epoch [2/3], Step[100/1200], Loss:0.1846
Epoch [2/3], Step[200/1200], Loss:0.2254
Epoch [2/3], Step[300/1200], Loss:0.2234
Epoch [2/3], Step[400/1200], Loss:0.1317
Epoch [2/3], Step[500/1200], Loss:0.1846
Epoch [2/3], Step[600/1200], Loss:0.2655
Epoch [2/3], Step[700/1200], Loss:0.2631
Epoch [2/3], Step[800/1200], Loss:0.2600
Epoch [2/3], Step[900/1200], Loss:0.0795
Epoch [2/3], Step[1000/1200], Loss:0.0955
Epoch [2/3], Step[1100/1200], Loss:0.1099
Epoch [2/3], Step[1200/1200], Loss:0.2569
Epoch [3/3

In [14]:
######## TEST ########
with torch.no_grad():
  correct = 0

  for image, label in test_loader:
    image = image.reshape(-1, sequence_length, input_size).to(device)
    label = label.to(device)
    output = model(image)
    _, pred = torch.max(output.data, 1)
    correct += (pred == label).sum().item()

  print('Test Accuracy of RNN model on the {} test images: {}%'.format(len(test_data), 100 * correct / len(test_data)))

Test Accuracy of RNN model on the 10000 test images: 97.02%
