### Importing all the libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

### Using CUDA if it is available

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Model Parameters

In [3]:
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 2

### Creating a Recurrent Neural Network Model

In [4]:
class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size*sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # Forward Propagation
        out, _ = self.rnn(x, h0)
        out = out.reshape(x.shape[0],-1)
        out = self.fc(out)
        return out

### Checking the model on random data

In [5]:
model = RNN(28,256,2,10)
x = torch.randn(64,28,28)
print(model(x).shape)

torch.Size([64, 10])


In [6]:
arr = model(x)

In [7]:
arr

tensor([[-1.1171e-01,  9.5957e-02, -5.5159e-02, -1.4504e-02, -1.2041e-01,
          5.6838e-02,  3.1331e-02, -8.4525e-02,  2.0487e-02, -1.6939e-01],
        [ 1.4707e-01, -6.2357e-02,  7.9535e-02,  3.3588e-02, -1.2882e-01,
          1.2851e-01, -1.6320e-01,  3.9704e-02,  4.7919e-02, -7.5878e-02],
        [-4.7012e-02, -4.9080e-02, -1.8756e-01, -1.0445e-01, -2.0444e-02,
         -1.1676e-01, -8.9245e-03,  1.6870e-02, -3.0442e-02, -1.0440e-01],
        [-4.6200e-02, -1.0749e-01, -2.0220e-01, -4.4510e-03, -1.8582e-01,
          3.3873e-02,  2.1148e-02,  2.8845e-02, -5.8076e-03, -5.7431e-02],
        [-2.7275e-01, -1.1894e-01,  7.0030e-02, -1.1961e-02, -1.0902e-02,
         -1.0820e-01, -2.0195e-02, -5.1798e-03,  7.3270e-03, -4.2017e-03],
        [ 7.2873e-02, -1.8564e-01,  6.6162e-04,  1.0725e-01, -1.0456e-01,
          7.5477e-02, -2.3331e-02, -2.8112e-03,  8.2449e-02, -4.4629e-02],
        [-2.9909e-02,  1.7234e-02,  8.8662e-02,  3.8701e-02,  2.5587e-02,
          8.2994e-02,  7.4919e-0

In [8]:
torch.einsum("ij->i",arr)

tensor([-0.3511,  0.0461, -0.6522, -0.5255, -0.4750, -0.0223,  0.4922, -0.0623,
         0.0305,  0.2871, -0.4701, -0.0879, -0.4916, -0.5857, -0.0733, -0.0667,
        -0.4254, -0.1682,  0.1341,  0.1169, -0.5317, -0.1443, -0.5247, -0.0879,
         0.1032, -0.3224,  0.2876,  0.2203,  0.1938,  0.0576, -0.1920, -0.0015,
        -0.1472,  0.1048, -0.1371,  0.1618, -0.1125,  0.1031,  0.2876, -0.4844,
        -0.1202,  0.3060,  0.0044, -0.3533, -0.1877, -0.1628,  0.1282,  0.1531,
         0.2881, -0.4034,  0.0239, -0.2868,  0.0859,  0.5330, -0.2795, -0.1924,
         0.0502, -0.1909, -0.4454, -0.1624, -0.3763,  0.0333,  0.2016, -0.0116],
       grad_fn=<SumBackward1>)

### Data Loading

In [9]:
train_dataset = datasets.MNIST(root='data/',train=True, transform=transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset = datasets.MNIST(root='data/',train=False, transform=transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)

### Intialize the Model

In [10]:
model = RNN(input_size,hidden_size,num_layers,num_classes).to(device)

### Loss and Optimizer

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train Network

In [12]:
for epoch in range(num_epochs):
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get the data to CUDA if possible
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)
        
        # forward
        scores = model(data)
        loss = criterion(scores,targets)
        
        #backward
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient Descent
        optimizer.step()

### Checking the accuracy of the Model

In [13]:
def check_accuracy(loader, model):
    
    if loader.dataset.train:
        print("Checking accuracy on training data")
        
    else :
        print("Checking accuracy on test data")
    
    num_correct = 0
    num_samples = 0
    
    #model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)
            
            scores = model(x)
            
            _, predictions = scores.max(1)
            num_correct += (predictions==y).sum()
            num_samples += predictions.size(0)
            
    print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
        
    #model.train()

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

Checking accuracy on training data
Got 57051/60000 with accuracy 95.08
Checking accuracy on test data
Got 9537/10000 with accuracy 95.37
