In [58]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [59]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers=1, batch_first=True, dropout=0, device='gpu'):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.device = device
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout)
        self.fc = nn.Linear(num_layers*hidden_size, num_classes)
        
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(device)
        _, h_n = self.rnn(x, h_0)
        h_n = h_n.permute(1,0,2)
        h_n = h_n.reshape(h_n.shape[0], -1)
        res = self.fc(h_n)
        
        return res

In [60]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

input_size = 28
hidden_size = 256
num_layers = 2
num_classes = 10
dropout = 0
learning_rate = 1e-3
batch_size = 64
num_epoches = 10

In [61]:
model = RNN(input_size, hidden_size, num_classes, num_layers, True, dropout, device)
model.to(device)
model

RNN(
  (rnn): RNN(28, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=512, out_features=10, bias=True)
)

In [62]:
train_dataset = datasets.MNIST(root='dataset/minist/',train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.MNIST(root='dataset/minist/',train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [63]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [64]:
for epoch in range(num_epoches):
    losses = []
    for idx, (data, label) in enumerate(train_loader):
        data = data.to(device)
        data = data.squeeze(1)
        label = label.to(device)
        
        pred = model(data)
        loss = criterion(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.append(loss)
        
    print(f"epoch: {epoch+1}, loss: {sum(losses)/len(losses):.2f}")

epoch: 1, loss: 0.60
epoch: 2, loss: 0.28
epoch: 3, loss: 0.21
epoch: 4, loss: 0.17
epoch: 5, loss: 0.15
epoch: 6, loss: 0.16
epoch: 7, loss: 0.15
epoch: 8, loss: 0.14
epoch: 9, loss: 0.14
epoch: 10, loss: 0.13


In [65]:
model.eval()
total = 0
num_wrong = 0

with torch.no_grad():
    for idx, (data, label) in  enumerate(test_loader):
        data = data.to(device)
        data = data.squeeze(1)
        label = label.to(device)
        total += data.shape[0]

        pred = model(data).argmax(1)
        num_wrong += (abs(pred-label)).clamp(0,1).sum()

print(f"Accuracy on  test set : {(1-(num_wrong/total))*100:.2f}%")

Accuracy on  test set : 94.39%
