In [12]:
import torch #main torch module
import torch.nn as nn #neural net module
import torch.optim as optim #optimizers
import torch.nn.functional as F #functions like ReLu Sig Tanh etc
from torch.utils.data import DataLoader #help us with datasets

import torchvision.datasets as datasets #using to access std data
import torchvision.transforms as transforms #transformations

### Network Definition
We will define our RNN here.  
Now we will use the RNN with Images.  
Image has 28x28x1 dimension

We dont use RNNs for Images, but in this case we can imagine  
as there are 28 time steps in this RNN, and we will send the   image row by row into the RNN. Each row is 28 that will go  
So we will use, different hyperparameters, as given below

In [13]:
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate =  0.001
batch_size = 64
num_epochs = 1

Now we will define RNN

In [34]:
class RNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, 
                          num_layers, batch_first = True)
        #No of sequences is not necessary, works for any number
        #batch first
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self,x):
        h0 = torch.zeros(self.num_layers, 
                         x.size(0), self.hidden_size).to(device)
        
        #extra cell state required for LSTM
        c0 = torch.zeros(self.num_layers, 
                         x.size(0), self.hidden_size).to(device)
        #forward prop
        out, _ = self.lstm(x,(h0,c0))
        
        out = self.fc(out[:,-1,:])
#       Here we only take the last hidden state as it has info from
#         all prev
        return out

### Modelling and Evaluation

In [35]:
# Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [36]:
# Loading Data
train_dataset = datasets.MNIST(root='dataset/',
                               train=True, 
                               transform = transforms.ToTensor(),
                               download = True)

train_loader = DataLoader(dataset = train_dataset,
                         batch_size = batch_size,
                         shuffle = True)

In [37]:
# Loading Test Data
test_dataset = datasets.MNIST(root='dataset/',
                               train=False, 
                               transform = transforms.ToTensor(),
                               download = True)

test_loader = DataLoader(dataset = test_dataset,
                         batch_size = batch_size,
                         shuffle = True)

In [38]:
#init the network
model = RNN_LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = learning_rate)

In [39]:
# training loop
for epoch in range(num_epochs):
    for batch_idx,(data, targets) in enumerate(train_loader):
        # data to devices
        data = data.to(device).squeeze(1) #squeeze 64x1x28x28 to 64x28x28
        targets = targets.to(device)
        
        #fwd
        scores = model(data)
        loss = criterion(scores, targets)
        
        #back
        optimizer.zero_grad() #so that it does not store prev backprop calc
        loss.backward()
        
        #gradient desc
        optimizer.step()

In [40]:
def check_accuracy(loader,model):
    if loader.dataset.train:
        print("Checking Training Data Accuracy")
    else:
        print("Checking Test Data Accuract")
    
    num_correct = 0
    num_samples = 0
    model.eval() #set to evaluation mode
    
    with torch.no_grad():
        #only have to check accuracy, dont compute grads
        for x,y in loader:
            x = x.to(device).squeeze(1)
            y = y.to(device)
            #x = x.reshape(x.shape[0],-1)
            scores = model(x)
            _, predictions = scores.max(1)
            
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
        accuracy = float(num_correct)/float(num_samples)*100
        print(f"Got {num_correct} / {num_samples} with accuracy {accuracy: .2f}")

    model.train()
    return accuracy

In [41]:
check_accuracy(test_loader,model)
check_accuracy(train_loader,model)

Checking Test Data Accuract
Got 9699 / 10000 with accuracy  96.99
Checking Training Data Accuracy
Got 58130 / 60000 with accuracy  96.88


96.88333333333333