In [1]:
## Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim  as optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
from torchvision.transforms import transforms

In [2]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'mps')

In [3]:
# Hyperparameters
seq_length = 28
input_size= 28
num_layers = 2
hidden_size = 256
num_classes = 10
batch = 100
num_epoch = 2
learning_rate = 0.001

In [4]:
## Create Fully connected network

class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,seq_length,num_classes = 10):
        super(RNN,self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.gru = nn.GRU(input_size,hidden_size,num_layers,batch_first= True)
        
        self.fc1 = nn.Linear(hidden_size* seq_length,num_classes)
        
    def forward(self,x):
        h0 = torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device) 
        out , _ = self.gru(x,h0)
        out = out.reshape(out.shape[0],-1)
        out = self.fc1(out)
        return out
        
    

In [91]:
## load Dataset
train_dataset = datasets.MNIST(root = 'datasets/',train=True,download=True,transform = transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,batch_size=batch,shuffle=True)

test_dataset = datasets.MNIST(root = 'datasets/',train=False,download=True,transform = transforms.ToTensor())
test_loader = DataLoader(dataset=test_dataset,batch_size=batch,shuffle=True)

In [92]:
## Initialize model 
model = RNN(input_size,hidden_size,num_layers,seq_length,num_classes).to(device)

In [93]:
## Losss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

In [94]:
%%time
## Train Network 
for epoch in range(num_epoch):
    for batch_idx, (data,target) in enumerate(train_loader):
        # Get data to cuda 
        data = data.to(device=device).squeeze(1)
        target =target.to(device=device)
        
        # # reshape
        # data = data.view(data.shape[0],-1)
        
        # forward
        scores = model(data)
        loss = criterion(scores,target)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # gradient descent
        optimizer.step()

CPU times: user 6min 8s, sys: 6min 50s, total: 12min 59s
Wall time: 5min 36s


In [95]:
# check the accuracy of out trained model 
def check_accuracy(loader,model):
    for data,target in loader:
        num_correct = 0
        num_sample = 0
        model.eval()
        with torch.no_grad():
            data = data.to(device=device).squeeze(1)
            target = target.to(device=device)

            # # reshape 
            # data = data.view(data.shape[0],-1)


            scores = model(data)
            _, pred = scores.max(1)
            # print(list(zip(pred,target)))
            num_correct += sum(pred == target)
            num_sample  += pred.shape[0]
    print(f'Total {num_correct} correct  / out of {num_sample} - accuracy {num_correct/num_sample :.3f} ')
    model.train()
            
            
            

In [96]:
# on test dataset
check_accuracy(test_loader,model)

Total 97 correct  / out of 100 - accuracy 0.970 


In [97]:
# on train datasets
check_accuracy(train_loader,model)

Total 97 correct  / out of 100 - accuracy 0.970 
