### Imports

In [3]:
import torch 
import torchvision
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim # For all optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F # All functions that don't have any parameters
from torch.utils.data import DataLoader # Gives easier dataset management and creates mini batches
import torchvision.datasets as datasets # Has standard datasetswe can import in a nice way
import torchvision.transforms as transforms # Transformations we can perform our dataset

### Set Device

In [2]:
device = "cpu"

if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"

### Hyperparameters

In [12]:
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

### Create Bi-directional LSTM Network

In [10]:
class BRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layer = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,
                            bidirectional=True)
        self.fc = nn.Linear(hidden_size*2, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layer*2, x.size(0), self.hidden_size ).to(device)

        c0 = torch.zeros(self.num_layer*2, x.size(0), self.hidden_size ).to(device)
        
        out, (hidden_state, cell_state) = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :]) # sending last hidden state of model  

        return out

- to make `LSTM` bidiectional just keeping that parameter as `bidirectional = True`

- `self.num_layers x 2` just so that we can several number of layers time by 2, since one of them going forward and one of them going backward but they are all gone get concatinated in the same specific hidden state, so we just to exand this tensor by 2 in 2 axis

### Initalize Network

In [14]:
model =  BRNN(input_size, hidden_size, num_layers, num_classes).to(device)

### Load Dataset

In [15]:
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=False)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)

### Loss & Optimizer

In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train Network

In [22]:
for epoch in range(num_epochs):

    total_loss = 0

    for batch_size, (data, target) in enumerate(train_loader):

        data = data.to(device).squeeze(1)
        target = target.to(device)

        # forward pass
        scores = model(data)

        loss = criterion(scores, target)

        # backward

        model.zero_grad() # clear previous gradients

        loss.backward() # back propogation

        optimizer.step() # update mode weights

        total_loss += loss.item() # Accumulate loss

    average_loss = total_loss / len(train_loader) # Calculating average loss

    print(f"Epoch {epoch}: Average Loss: {average_loss}")

Epoch 0: Average Loss: 0.4211287443786224
Epoch 1: Average Loss: 0.09959488496162505


### Check accuracy on train & test see how good our model

In [23]:
def check_accuracy(loader, mode):
    if loader.dataset.train:
        print("Checking accuracy on training data")
    else:
        print("Checking accuracy on testing data")

    num_correct = 0
    num_sample = 0
    model.eval()  # put model in evaluation model

    with torch.no_grad():

        for x, y in loader:

            x = x.to(device).squeeze(1)
            y = y.to(device)

            scores = model(x)

            _, predictions = scores.max(1)

            num_correct += (predictions == y).sum()

            num_sample += predictions.size(0)

        acc = float(num_correct) / float(num_sample) * 100

        print(f"Got {num_correct} / {num_sample} with accuracy {acc:.2f}")
    
    model.train() # put model in tarining mode

In [24]:
check_accuracy(train_loader, model)

Checking accuracy on training data
Got 58912 / 60000 with accuracy 98.19


In [25]:
check_accuracy(test_loader, model)

Checking accuracy on testing data
Got 9779 / 10000 with accuracy 97.79
