In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [3]:
# Hyper Parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 32
num_epochs = 2
learning_rate = 0.01

In [4]:
# datasets, dataloader
train_dsets = dsets.MNIST(root='../data/', train=True, download=True, transform=transforms.ToTensor())
test_dsets = dsets.MNIST(root='../data/', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dsets, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dsets, batch_size=batch_size, shuffle=False)

## RNN

In [12]:
# RNN model : many to one
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # initial states
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        
        # forward
        out, _ = self.lstm(x, (h0, c0))
        
        # decode hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

In [13]:
# create model
rnn = RNN(input_size, hidden_size, num_layers, num_classes)

print(rnn)

RNN(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)


In [14]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

In [15]:
# Training
i = 0

for epoch in range(num_epochs):
    for images, labels in train_loader:
        images = Variable(images.view(-1, sequence_length, input_size)) # 32x1x28x28 - 32x28x28
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = rnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        i += 1
        if i % 200 == 0:
            print("Epoch: {}/{}...".format(epoch+1, num_epochs), 
                  "Iterations: {}...".format(i), 
                  "Loss: {}".format(loss.data[0]))

Epoch: 1/2... Iterations: 200... Loss: 0.8602895736694336
Epoch: 1/2... Iterations: 400... Loss: 0.4100678563117981
Epoch: 1/2... Iterations: 600... Loss: 0.15973937511444092
Epoch: 1/2... Iterations: 800... Loss: 0.2933061718940735
Epoch: 1/2... Iterations: 1000... Loss: 0.21323014795780182
Epoch: 1/2... Iterations: 1200... Loss: 0.16835957765579224
Epoch: 1/2... Iterations: 1400... Loss: 0.11336501687765121
Epoch: 1/2... Iterations: 1600... Loss: 0.13124777376651764
Epoch: 1/2... Iterations: 1800... Loss: 0.03121345117688179
Epoch: 2/2... Iterations: 2000... Loss: 0.04371058940887451
Epoch: 2/2... Iterations: 2200... Loss: 0.14326423406600952
Epoch: 2/2... Iterations: 2400... Loss: 0.17982105910778046
Epoch: 2/2... Iterations: 2600... Loss: 0.14355671405792236
Epoch: 2/2... Iterations: 2800... Loss: 0.02993115223944187
Epoch: 2/2... Iterations: 3000... Loss: 0.5630106925964355
Epoch: 2/2... Iterations: 3200... Loss: 0.22818925976753235
Epoch: 2/2... Iterations: 3400... Loss: 0.127395

In [35]:
# Testing
corr = total = 0

for images, labels in test_loader:
    images = Variable(images.view(-1, sequence_length, input_size))
    outputs = rnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    corr += (predicted == labels).sum()
    
print('Accuracy: %f %%' % (100 * corr / total))

Accuracy: 96.890000 %


## BiRNN

In [37]:
# Hyper Parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 32
num_epochs = 2
learning_rate = 0.003

In [43]:
# BiRNN model
class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size*2, num_classes)
        
    def forward(self, x):
        # initial state
        h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size))
        c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size))
        
        # forward
        out, _ = self.lstm(x, (h0, c0))
        
        # decode hidden state of last time step
        out = self.fc(out[:, -1, :])
        return out

In [44]:
# create model
birnn = BiRNN(input_size, hidden_size, num_layers, num_classes)

print(birnn)

BiRNN(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=256, out_features=10, bias=True)
)


In [45]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(birnn.parameters(), lr=learning_rate)

# Training
i = 0

for epoch in range(num_epochs):
    for images, labels in train_loader:
        images = Variable(images.view(-1, sequence_length, input_size)) # 32x1x28x28 - 32x28x28
        labels = Variable(labels)
        
        optimizer.zero_grad()
        outputs = birnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        i += 1
        if i % 200 == 0:
            print("Epoch: {}/{}...".format(epoch+1, num_epochs), 
                  "Iterations: {}...".format(i), 
                  "Loss: {}".format(loss.data[0]))

Epoch: 1/2... Iterations: 200... Loss: 0.5620202422142029
Epoch: 1/2... Iterations: 400... Loss: 0.33108022809028625
Epoch: 1/2... Iterations: 600... Loss: 0.09554889053106308
Epoch: 1/2... Iterations: 800... Loss: 0.16981902718544006
Epoch: 1/2... Iterations: 1000... Loss: 0.2414497286081314
Epoch: 1/2... Iterations: 1200... Loss: 0.1587020605802536
Epoch: 1/2... Iterations: 1400... Loss: 0.1573137640953064
Epoch: 1/2... Iterations: 1600... Loss: 0.04441150277853012
Epoch: 1/2... Iterations: 1800... Loss: 0.031389571726322174
Epoch: 2/2... Iterations: 2000... Loss: 0.021580053493380547
Epoch: 2/2... Iterations: 2200... Loss: 0.07308100908994675
Epoch: 2/2... Iterations: 2400... Loss: 0.01798292063176632
Epoch: 2/2... Iterations: 2600... Loss: 0.0822453424334526
Epoch: 2/2... Iterations: 2800... Loss: 0.22147198021411896
Epoch: 2/2... Iterations: 3000... Loss: 0.01293458603322506
Epoch: 2/2... Iterations: 3200... Loss: 0.20134693384170532
Epoch: 2/2... Iterations: 3400... Loss: 0.00372

In [46]:
# Testing
corr = total = 0

for images, labels in test_loader:
    images = Variable(images.view(-1, sequence_length, input_size))
    outputs = birnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    corr += (predicted == labels).sum()
    
print('Accuracy: %f %%' % (100 * corr / total))

Accuracy: 98.290000 %


In [None]:
# Save the Model
torch.save(birnn.state_dict(), 'birnn.pkl')