In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [2]:
#### parameters
input_size = 28
sequence_len = 28
hidden_size = 128 # you can chose any number
num_classes = 10
num_layers = 2
num_epochs = 2
batch_size = 100
learning_rate = 0.001

In [3]:
#### Model
class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        ##### i think here below we are initiating the (num_layers and hidden_size) klte layer with 128 nodes
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) # not clear
        #print('print from forward method >>>', len(h0[0][0]), len(h0[1][0]))
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        print(out.shape)
        out = self.fc1(out)
        return out
    
model = RNN(input_size, hidden_size, num_layers, num_classes)
model

RNN(
  (rnn): RNN(28, 128, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=128, out_features=10, bias=True)
)

In [4]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
training_data = torchvision.datasets.MNIST('dataMNIST/', train=True, transform=transforms.ToTensor(), download=True)
testing_data  = torchvision.datasets.MNIST('dataMNIST/', train=False, transform=transforms.ToTensor(), download=True)

train_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=testing_data, batch_size=batch_size, shuffle=True) 


In [6]:
#### Training loop

for images, labels in train_loader:
    print(images.shape, labels) #### we got 100 images with 100 labels
    break

torch.Size([100, 1, 28, 28]) tensor([1, 5, 6, 3, 1, 7, 7, 6, 7, 1, 1, 1, 8, 0, 6, 0, 1, 6, 6, 5, 5, 8, 5, 4,
        3, 7, 3, 5, 5, 3, 0, 8, 7, 3, 3, 4, 3, 7, 5, 3, 3, 3, 7, 8, 8, 8, 2, 8,
        6, 3, 6, 3, 0, 2, 8, 9, 1, 0, 9, 1, 8, 5, 6, 7, 7, 9, 1, 0, 9, 4, 1, 2,
        2, 0, 3, 9, 2, 1, 5, 9, 1, 3, 7, 9, 6, 1, 3, 7, 6, 7, 7, 3, 0, 3, 5, 8,
        2, 2, 3, 6])


# Calculating loss and Training accu

In [7]:
for epoch in range(num_epochs):
    corr = 0
    n_samples = 0
    ix = 0
    
    for images, labels in train_loader:
        #print(images.shape, labels) #### we got 100 images with 100 labels
        images = images.reshape(-1, 28, 28)
        
        #forward
        scores_pred = model(images)
        loss = loss_func(scores_pred, labels)

        #backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent update step/adam step
        optimizer.step()
        
        _, predictions = torch.max(scores_pred, 1)
        good_pred = (predictions == labels).sum().item()
        
        corr += good_pred
        n_samples += labels.size(0)
        ix += 1
    
        if ix % 100 == 0:
            print(loss)
            
    print(100 * corr / n_samples)

torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size([100, 128])
torch.Size(

KeyboardInterrupt: 

# Testing accu

In [None]:
# Trainig Test

with torch.no_grad():
    corr = 0
    for x, y in test_loader:
        x = x.reshape(-1, 28, 28)
        y = y
        
        scores_pred = model(x)
        _, predictions = torch.max(scores_pred, 1)
        corr += (predictions == y).sum().item()
        
    print(corr / len(test_loader))
