### Importing all the libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

### Using CUDA if it is available

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Model Parameters

In [3]:
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 2

### Creating a Recurrent Neural Network Model

In [4]:
class GRU(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRU,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size*sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # Forward Propagation
        out, _ = self.gru(x, h0)
        out = out.reshape(x.shape[0],-1)
        out = self.fc(out)
        return out

### Checking the model on random data

In [5]:
model = GRU(28,256,2,10)
x = torch.randn(64,28,28)
print(model(x).shape)

torch.Size([64, 10])


In [6]:
arr = model(x)

In [7]:
arr

tensor([[-7.0806e-02,  2.2499e-02, -2.1765e-03, -3.3433e-02, -4.0606e-02,
         -1.3798e-02,  2.5928e-02,  2.1620e-02, -5.2083e-03, -4.1989e-03],
        [-7.1626e-02, -2.4000e-02, -1.5007e-02, -4.0232e-02, -6.1066e-02,
          3.6336e-03,  3.0044e-02,  6.2538e-02, -3.3396e-02,  9.7090e-03],
        [-6.7653e-02, -7.9656e-02,  3.2971e-02, -1.9640e-02, -5.8909e-02,
         -1.1736e-02,  3.7345e-02,  1.3181e-02, -1.5013e-02, -2.2844e-02],
        [-5.5630e-02,  2.7584e-02, -2.3073e-02, -5.4636e-02, -9.2567e-03,
         -7.0438e-02,  4.2742e-02, -2.9316e-02,  3.1388e-02,  2.2908e-02],
        [-3.9422e-02,  2.3543e-02,  4.2559e-02,  6.1401e-03, -5.6717e-03,
         -6.0426e-02, -2.8755e-02,  7.4967e-02, -1.7872e-02, -1.4772e-04],
        [-3.9505e-02, -2.1356e-02, -1.1055e-02, -8.6820e-03, -2.6310e-02,
         -3.7904e-02,  3.4431e-02,  2.7484e-02,  4.8360e-02, -2.5630e-02],
        [-1.4477e-02, -6.3565e-02, -1.4688e-02, -3.1345e-02, -7.7132e-03,
         -4.7893e-02,  2.3722e-0

In [8]:
torch.einsum("ij->i",arr)

tensor([-0.1002, -0.1394, -0.1920, -0.1177, -0.0051, -0.0602, -0.1006, -0.3143,
        -0.2424, -0.0335, -0.1685, -0.1700, -0.1341, -0.1599, -0.2401, -0.1613,
        -0.1223, -0.0513, -0.1354, -0.2037,  0.0643, -0.2687, -0.0847, -0.1958,
        -0.1236, -0.1681, -0.0466, -0.0302, -0.0610, -0.1768, -0.2208, -0.1361,
        -0.2052, -0.0433, -0.2679, -0.1345, -0.1810, -0.2004, -0.0227, -0.1655,
         0.0240, -0.1800, -0.1672, -0.1827, -0.0756, -0.2043, -0.1173, -0.1036,
        -0.3923, -0.1511, -0.1821, -0.1307, -0.0685, -0.2291, -0.1907, -0.1728,
        -0.0120, -0.2729, -0.0771, -0.1851, -0.1629, -0.3134, -0.3013, -0.0706],
       grad_fn=<SumBackward1>)

### Data Loading

In [9]:
train_dataset = datasets.MNIST(root='data/',train=True, transform=transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset = datasets.MNIST(root='data/',train=False, transform=transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)

### Intialize the Model

In [10]:
model = GRU(input_size,hidden_size,num_layers,num_classes).to(device)

### Loss and Optimizer

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train Network

In [12]:
for epoch in range(num_epochs):
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get the data to CUDA if possible
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)
        
        # forward
        scores = model(data)
        loss = criterion(scores,targets)
        
        #backward
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient Descent
        optimizer.step()

### Checking the accuracy of the Model

In [13]:
def check_accuracy(loader, model):
    
    if loader.dataset.train:
        print("Checking accuracy on training data")
        
    else :
        print("Checking accuracy on test data")
    
    num_correct = 0
    num_samples = 0
    
    #model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)
            
            scores = model(x)
            
            _, predictions = scores.max(1)
            num_correct += (predictions==y).sum()
            num_samples += predictions.size(0)
            
    print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
        
    #model.train()

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

Checking accuracy on training data
Got 57700/60000 with accuracy 96.17
Checking accuracy on test data
Got 9637/10000 with accuracy 96.37
