In [155]:
## Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim  as optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
from torchvision.transforms import transforms

In [156]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [157]:
# Hyperparameters
seq_length = 28
input_size= 28
num_layers = 2
hidden_size = 256
num_classes = 10
batch = 100
num_epoch = 2
learning_rate = 0.001

In [158]:
## Create Fully connected network

class BRNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,seq_length,num_classes = 10):
        super(BRNN,self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first= True,bidirectional=True)
        
        self.fc1 = nn.Linear(2*hidden_size,num_classes)
        
    def forward(self,x):
        h0 = torch.zeros(2*self.num_layers,x.size(0),self.hidden_size).to(device) 
        c0 = torch.zeros(2*self.num_layers,x.size(0),self.hidden_size).to(device) 
        out , _ = self.lstm(x,(h0,c0))
        print(out.shape)
        # out = out.reshape(out.shape[0],-1)
        out = self.fc1(out[:,-1,:])
        return out
        
    

In [159]:
## load Dataset
train_dataset = datasets.MNIST(root = 'datasets/',train=True,download=True,transform = transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,batch_size=batch,shuffle=True)

test_dataset = datasets.MNIST(root = 'datasets/',train=False,download=True,transform = transforms.ToTensor())
test_loader = DataLoader(dataset=test_dataset,batch_size=batch,shuffle=True)

In [160]:
## Initialize model 
model = RNN(input_size,hidden_size,num_layers,seq_length,num_classes).to(device)

In [161]:
## Losss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

In [162]:
%%time
## Train Network 
for epoch in range(num_epoch):
    for batch_idx, (data,target) in enumerate(train_loader):
        # Get data to cuda 
        data = data.to(device=device).squeeze(1)
        target =target.to(device=device)
        
        # # reshape
        # data = data.view(data.shape[0],-1)
        
        # forward
        scores = model(data)
        loss = criterion(scores,target)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # gradient descent
        optimizer.step()

CPU times: user 28min, sys: 15min 10s, total: 43min 10s
Wall time: 7min 43s


In [163]:
# check the accuracy of out trained model 
def check_accuracy(loader,model):
    for data,target in loader:
        num_correct = 0
        num_sample = 0
        model.eval()
        with torch.no_grad():
            data = data.to(device=device).squeeze(1)
            target = target.to(device=device)

            # # reshape 
            # data = data.view(data.shape[0],-1)


            scores = model(data)
            _, pred = scores.max(1)
            # print(list(zip(pred,target)))
            num_correct += sum(pred == target)
            num_sample  += pred.shape[0]
    print(f'Total {num_correct} correct  / out of {num_sample} - accuracy {num_correct/num_sample :.3f} ')
    model.train()
            
            
            

In [164]:
# on test dataset
check_accuracy(test_loader,model)

Total 97 correct  / out of 100 - accuracy 0.970 


In [165]:
# on train datasets
check_accuracy(train_loader,model)

Total 96 correct  / out of 100 - accuracy 0.960 


lstm.weight_ih_l0 torch.Size([1024, 28])

lstm.weight_hh_l0 torch.Size([1024, 256])

lstm.bias_ih_l0 torch.Size([1024])

lstm.bias_hh_l0 torch.Size([1024])

lstm.weight_ih_l1 torch.Size([1024, 256])

lstm.weight_hh_l1 torch.Size([1024, 256])

lstm.bias_ih_l1 torch.Size([1024])

lstm.bias_hh_l1 torch.Size([1024])

fc1.weight torch.Size([10, 256])

fc1.bias torch.Size([10])

