# RNN

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [2]:
#### parameters
# i think RNN take one row in a time not like CNN takes 784 at once(the whole pic)
input_size = 28 
sequence_len = 28 
hidden_size = 128 # you can chose any number
num_classes = 10
num_layers = 2
num_epochs = 2
batch_size = 100
learning_rate = 0.001

In [3]:
#### Model
class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        ##### i think here below we are initiating the (num_layers and hidden_size) klte layer with 128 nodes
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) # not clear
        #print('print from forward method >>>', len(h0[0][0]), len(h0[1][0]))
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        out = self.fc1(out)
        return out
    
RNN_model = RNN(input_size, hidden_size, num_layers, num_classes)
RNN_model

RNN(
  (rnn): RNN(28, 128, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=128, out_features=10, bias=True)
)

In [4]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(RNN_model.parameters(), lr=learning_rate)

In [5]:
training_data = torchvision.datasets.MNIST('dataMNIST/', train=True, transform=transforms.ToTensor(), download=True)
testing_data  = torchvision.datasets.MNIST('dataMNIST/', train=False, transform=transforms.ToTensor(), download=True)

train_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=testing_data, batch_size=batch_size, shuffle=True) 


In [6]:
#### Training loop

for images, labels in train_loader:
    print(images.shape, labels) #### we got 100 images with 100 labels
    break

torch.Size([100, 1, 28, 28]) tensor([2, 0, 2, 1, 1, 6, 4, 4, 4, 0, 1, 1, 2, 4, 1, 3, 1, 2, 6, 0, 8, 2, 2, 0,
        9, 9, 2, 4, 1, 5, 0, 5, 0, 3, 3, 8, 8, 5, 4, 4, 4, 3, 7, 0, 3, 8, 4, 1,
        1, 8, 4, 4, 7, 5, 1, 5, 1, 9, 9, 9, 7, 5, 1, 9, 8, 7, 9, 1, 9, 1, 7, 3,
        7, 4, 8, 4, 2, 5, 8, 2, 4, 9, 0, 1, 8, 4, 6, 5, 2, 1, 3, 3, 3, 3, 3, 5,
        0, 7, 9, 7])


# First Check if your loss error is reducing

In [7]:
images, labels = next(iter(train_loader))
ix = 0

for i in range(12):
    images = images.reshape(-1, 28, 28) # important
    model_score = RNN_model(images)
    loss = loss_func(model_score, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    ix+=0
    
    if ix % 100 == 0:
            print(loss)


tensor(2.3210, grad_fn=<NllLossBackward0>)
tensor(2.2959, grad_fn=<NllLossBackward0>)
tensor(2.2746, grad_fn=<NllLossBackward0>)
tensor(2.2538, grad_fn=<NllLossBackward0>)
tensor(2.2314, grad_fn=<NllLossBackward0>)
tensor(2.2043, grad_fn=<NllLossBackward0>)
tensor(2.1672, grad_fn=<NllLossBackward0>)
tensor(2.1144, grad_fn=<NllLossBackward0>)
tensor(2.0453, grad_fn=<NllLossBackward0>)
tensor(1.9655, grad_fn=<NllLossBackward0>)
tensor(1.8849, grad_fn=<NllLossBackward0>)
tensor(1.8185, grad_fn=<NllLossBackward0>)


## 👉when you see our loss is reducing now we can train our model with a banch of data ########

# Calculating loss and Training accu

In [9]:
for epoch in range(num_epochs):
    corr = 0
    n_samples = 0
    ix = 0
    
    for images, labels in train_loader:
        #print(images.shape, labels) #### we got 100 images with 100 labels
        
        #torch.Size([100, 1, 28, 28])
        images = images.reshape(-1, 28, 28) #! Important...................
        #torch.Size([100, 128])

        #forward
        scores_pred = RNN_model(images)
        loss = loss_func(scores_pred, labels)

        #backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent update step/adam step
        optimizer.step()
        
        _, predictions = torch.max(scores_pred, 1)
        good_pred = (predictions == labels).sum().item()
        
        corr += good_pred
        n_samples += labels.size(0)
        ix += 1
    
        if ix % 100 == 0:
            print(loss)
            
    print(100 * corr / n_samples)

tensor(1.2823, grad_fn=<NllLossBackward0>)
tensor(0.9543, grad_fn=<NllLossBackward0>)
tensor(0.5456, grad_fn=<NllLossBackward0>)
tensor(0.4733, grad_fn=<NllLossBackward0>)
tensor(0.4870, grad_fn=<NllLossBackward0>)
tensor(0.3414, grad_fn=<NllLossBackward0>)
76.02666666666667
tensor(0.2906, grad_fn=<NllLossBackward0>)
tensor(0.2118, grad_fn=<NllLossBackward0>)
tensor(0.5696, grad_fn=<NllLossBackward0>)
tensor(0.4748, grad_fn=<NllLossBackward0>)
tensor(0.2743, grad_fn=<NllLossBackward0>)
tensor(0.2154, grad_fn=<NllLossBackward0>)
91.54333333333334


# Testing accu

In [11]:
# Trainig Test

with torch.no_grad():
    corr = 0
    for x, y in test_loader:
        x = x.reshape(-1, 28, 28) # Important
        y = y
        
        scores_pred = RNN_model(x)
        _, predictions = torch.max(scores_pred, 1)
        corr += (predictions == y).sum().item()
        
    print(corr / len(test_loader))


94.44
