### Importing all the libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

### Using CUDA if it is available

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Model Parameters

In [3]:
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 2

### Creating a Recurrent Neural Network Model

In [4]:
class LSTM(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size*sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # Forward Propagation
        out, _ = self.lstm(x, (h0,c0))
        out = out.reshape(x.shape[0],-1)
        out = self.fc(out)
        return out

### Checking the model on random data

In [5]:
model = LSTM(28,256,2,10)
x = torch.randn(64,28,28)
print(model(x).shape)

torch.Size([64, 10])


In [6]:
arr = model(x)

In [7]:
arr

tensor([[ 1.7642e-02,  3.5686e-03, -2.2869e-02, -3.3710e-03, -1.5651e-02,
          3.7409e-02,  1.6406e-02, -1.0635e-02, -1.8747e-03, -1.6424e-02],
        [ 8.2370e-03, -1.3177e-02, -1.7565e-02,  5.4534e-03, -6.3011e-03,
          4.2388e-02,  1.3626e-02, -1.4901e-02, -4.6550e-03, -1.2420e-02],
        [ 9.1973e-03, -6.0679e-03, -2.7110e-02,  3.8513e-03,  4.2878e-06,
          4.6040e-02,  6.6781e-03, -2.5721e-02,  1.1702e-02, -1.7580e-03],
        [ 2.9456e-03, -9.0138e-03, -2.1689e-02, -3.2404e-03, -9.6613e-03,
          3.9641e-02,  6.9506e-03, -2.2616e-02, -4.3066e-03, -1.4642e-02],
        [ 4.3008e-03, -6.0181e-03, -2.6823e-02,  7.3385e-03,  1.5283e-02,
          2.6342e-02,  1.0997e-02, -1.0733e-02,  6.0544e-03, -5.9961e-03],
        [ 4.1869e-03, -1.9029e-02, -1.6875e-02,  4.9076e-03, -1.7285e-02,
          4.6964e-02,  1.1368e-02,  1.3215e-03,  1.0172e-03, -7.7779e-03],
        [-2.5415e-03, -2.1129e-02, -2.6147e-02,  3.7797e-03,  1.0132e-03,
          4.6278e-02,  2.1413e-0

In [8]:
torch.einsum("ij->i",arr)

tensor([ 0.0042,  0.0007,  0.0168, -0.0356,  0.0207,  0.0088,  0.0034,  0.0095,
         0.0232,  0.0231,  0.0093, -0.0002,  0.0106,  0.0037, -0.0061,  0.0169,
         0.0152, -0.0189, -0.0061,  0.0280,  0.0013,  0.0112,  0.0146, -0.0120,
         0.0166,  0.0022,  0.0094, -0.0117, -0.0275, -0.0085, -0.0129,  0.0009,
         0.0159,  0.0355,  0.0021,  0.0127, -0.0179, -0.0013,  0.0229,  0.0125,
         0.0460,  0.0369,  0.0277,  0.0063, -0.0043, -0.0244,  0.0157, -0.0150,
         0.0117,  0.0086, -0.0598, -0.0267,  0.0183, -0.0047,  0.0070, -0.0008,
         0.0380,  0.0174, -0.0120,  0.0311,  0.0103,  0.0113,  0.0104,  0.0047],
       grad_fn=<SumBackward1>)

### Data Loading

In [9]:
train_dataset = datasets.MNIST(root='data/',train=True, transform=transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset = datasets.MNIST(root='data/',train=False, transform=transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)

### Intialize the Model

In [10]:
model = LSTM(input_size,hidden_size,num_layers,num_classes).to(device)

### Loss and Optimizer

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train Network

In [12]:
for epoch in range(num_epochs):
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get the data to CUDA if possible
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)
        
        # forward
        scores = model(data)
        loss = criterion(scores,targets)
        
        #backward
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient Descent
        optimizer.step()

### Checking the accuracy of the Model

In [13]:
def check_accuracy(loader, model):
    
    if loader.dataset.train:
        print("Checking accuracy on training data")
        
    else :
        print("Checking accuracy on test data")
    
    num_correct = 0
    num_samples = 0
    
    #model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)
            
            scores = model(x)
            
            _, predictions = scores.max(1)
            num_correct += (predictions==y).sum()
            num_samples += predictions.size(0)
            
    print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
        
    #model.train()

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

Checking accuracy on training data
Got 57563/60000 with accuracy 95.94
Checking accuracy on test data
Got 9612/10000 with accuracy 96.12
