In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import numpy as np

In [3]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
x_train, x_test = x_train.reshape([-1, 28, 28]), x_test.reshape([-1, 28, 28])

x_train, x_test = x_train / 255., x_test / 255.
x_train, x_test = torch.from_numpy(x_train), torch.from_numpy(x_test)
y_train, y_test = torch.from_numpy(y_train).type(torch.LongTensor), torch.from_numpy(y_test).type(torch.LongTensor)

In [6]:
x_train.shape

torch.Size([60000, 28, 28])

In [8]:
batch_size = 16

trainloader = []

for (i, j) in zip(x_train, y_train):
    trainloader.append([i, j])

trainloader = torch.utils.data.DataLoader(
    trainloader,
    shuffle = True,
    batch_size = batch_size
    )

testloader = []

for (i, j) in zip(x_test, y_test):
    testloader.append([i, j])

testloader = torch.utils.data.DataLoader(
    testloader,
    shuffle = True,
    batch_size = batch_size
)

In [14]:
class BiRNNModel(nn.Module):

    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(BiRNNModel, self).__init__()

        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first = True, nonlinearity = 'relu')

        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))

        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])

        return out

In [15]:
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10

model = BiRNNModel(input_dim, hidden_dim, layer_dim, output_dim)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

In [16]:
for epoch in range(20):
    running_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 2.295
[2,  2000] loss: 0.645
[3,  2000] loss: 0.341
[4,  2000] loss: 0.248
[5,  2000] loss: 0.189
[6,  2000] loss: 0.165
[7,  2000] loss: 0.149
[8,  2000] loss: 0.134
[9,  2000] loss: 0.119
[10,  2000] loss: 0.111
[11,  2000] loss: 0.108
[12,  2000] loss: 0.098
[13,  2000] loss: 0.098
[14,  2000] loss: 0.098
[15,  2000] loss: 0.091
[16,  2000] loss: 0.080
[17,  2000] loss: 0.080
[18,  2000] loss: 0.076
[19,  2000] loss: 0.076
[20,  2000] loss: 0.078
Finished Training


In [17]:
correct = 0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data

        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 96 %
