# Try it only with one epoch b/c LSTM is very slow on CPU 

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

import random
import numpy as np

SEED = 55643
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)


device = 'GPU' if torch.cuda.is_available() else 'cpu'


In [2]:
training_data = torchvision.datasets.MNIST('dataMNIST/', 
                                           train=True, 
                                           transform=transforms.ToTensor(), 
                                           download=True)

testing_data  = torchvision.datasets.MNIST('dataMNIST/', 
                                           train=False, 
                                           transform=transforms.ToTensor(), 
                                           download=True)

train_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size = 100, shuffle=True)
test_loader  = torch.utils.data.DataLoader(dataset=testing_data,  batch_size = 100, shuffle=True) 
# we've 10000 test samples and 60000 train samples
print(len(train_loader))

600


In [3]:
input_size = 28 
sequence_len = 28 
hidden_size = 128 # you can chose any number
num_classes = 10
num_layers = 2

num_epochs = 2
learning_rate = 0.001

class BRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BRNN, self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size*2, num_classes)
        
    def forward(self, x):
        # why times 2 ---- i think one it goes forward and the second goes backward
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        
        return out 
    
        #print("form BRNN...", out.shape) --> torch.Size([100, 10] cuz it has to be passed through the fc
        

BRNN_model = BRNN(input_size, hidden_size, num_layers, num_classes)
BRNN_model

BRNN(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=256, out_features=10, bias=True)
)

# Loss, optimizer

In [4]:
#### loss
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(BRNN_model.parameters(), lr=learning_rate)

# Train with a single image

In [5]:
####### if you see the loss error is getting lower and lower [[ his means your model is read to be traind ]]
images, labels = next(iter(train_loader))

for epoch in range(12):
    # 👉👉👉 try it first with on batch #
    images = images.to(device)
    labels = labels.to(device)
    #print(images.shape, labels) #### we got 100 images with 100 labels and 1 means it's black and white(1 depth)
    #torch.Size([100, 1, 28, 28])

    #RNN_model & BRNN_model ---- torch.Size([100, 28, 28])
    images = images.reshape(-1, 28, 28).to(device) # RNN_model & BRNN_model ---- [input must have 3 dimensions]

    #forward
    scores_pred = BRNN_model(images)
    loss = loss_func(scores_pred, labels)

    #backward
    optimizer.zero_grad()
    loss.backward()

    # gradient descent update step/adam step
    optimizer.step()

    print(f"Loss-erorr: {loss}")

Loss-erorr: 2.3028600215911865
Loss-erorr: 2.2973134517669678
Loss-erorr: 2.2919328212738037
Loss-erorr: 2.2864344120025635
Loss-erorr: 2.2805514335632324
Loss-erorr: 2.2740120887756348
Loss-erorr: 2.2665584087371826
Loss-erorr: 2.2580788135528564
Loss-erorr: 2.2491257190704346
Loss-erorr: 2.2421329021453857
Loss-erorr: 2.2379772663116455
Loss-erorr: 2.227597236633301


### Be care full on changing  the images.reshape for each model.....!

# Train the Model ..... with a banch of batchs....

In [6]:
for epoch in range(1):
    corr = 0
    n_samples = 0
    ix = 0
    images, labels = next(iter(train_loader))
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        images = images.reshape(-1, 28, 28).to(device)

        #forward
        scores_pred = BRNN_model(images)
        loss = loss_func(scores_pred, labels)

        #backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent update step/adam step
        optimizer.step()
        
        _, predictions = torch.max(scores_pred, 1)
        good_pred = (predictions == labels).sum().item()
        
        corr += good_pred
        n_samples += labels.size(0)
        
        ix += 1
    
        if ix % 100 == 0:
            print(loss)
            
    print(f" >>> Accuracy on Training-data >> : {100 * corr / n_samples}")

tensor(0.6419, grad_fn=<NllLossBackward0>)
tensor(0.3762, grad_fn=<NllLossBackward0>)
tensor(0.2493, grad_fn=<NllLossBackward0>)
tensor(0.1934, grad_fn=<NllLossBackward0>)
tensor(0.1978, grad_fn=<NllLossBackward0>)
tensor(0.2064, grad_fn=<NllLossBackward0>)
 >>> Accuracy on Training-data >> : 83.96


# Testing the model ---> use [ with torch.no_grads ]:

In [7]:
with torch.no_grad():
    corr = 0
    n_samples = 0
    
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        #print(images.shape, labels) #### we got 100 images with 100 labels
        #torch.Size([100, 1, 28, 28])

        #ANN_model ----- torch.Size([100, 128])
        #images = images.reshape(-1, 28*28).to(device) # only for NN_model  [input must have 2 dimensions]


        #RNN_model & BRNN_model ---- torch.Size([100, 28, 28])
        images = images.reshape(-1, 28, 28).to(device) # RNN_model & BRNN_model ----[input must have 3 dimensions]


        #forward
        scores_pred = BRNN_model(images)

        _, predictions = torch.max(scores_pred, 1)
        good_pred = (predictions == labels).sum().item()

        corr += good_pred
        n_samples += labels.size(0)

    print(f" >>> Accuracy ON Testing-data >> : {100 * corr / n_samples}")

 >>> Accuracy ON Testing-data >> : 94.29
