<a href="https://colab.research.google.com/github/41monster/AI_Courses/blob/main/2024_Spring_ML/RNN_Class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Class Of Recurrent Neural Networks

**Created By**: Sangam Khanal

### Recurrent Neural Networks From Scratch

The equation of RNNs is:
$$
h_t = tanh(x_tW_{ih}^T + b_{ih} + h_{t-1}W_{hh}^T + b_{hh})
$$


In [None]:
import math
import torch
from torch import nn
torch.manual_seed(0)
class CustomRNN(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.Wx = nn.Parameter(torch.randn(input_size, output_size))
        self.Wh = nn.Parameter(torch.randn(output_size, output_size))
        self.b = nn.Parameter(torch.zeros(1, output_size))

    def forward(self, inputs, hidden):
        outputs = []
        for x in inputs:
            hidden = torch.tanh((x@self.Wx) +(hidden@self.Wh) + self.b)
            outputs.append(hidden)
        return outputs, hidden

input = torch.rand(11,1,59)
hidden = torch.rand(1,1,30)
my_rnn = CustomRNN(59, 30)
out = my_rnn(input, hidden)
print(out[1], out[0][-1])

tensor([[[-0.9966, -0.6313, -0.9843, -0.9996,  1.0000,  0.9993,  0.9812,
          -0.9825,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000,  0.9998,
          -0.9989,  0.9992, -0.3938,  1.0000, -0.8379, -0.9995,  1.0000,
           0.9998,  0.6627, -1.0000, -1.0000,  1.0000,  0.9853, -0.9997,
          -0.9978, -1.0000]]], grad_fn=<TanhBackward0>) tensor([[[-0.9966, -0.6313, -0.9843, -0.9996,  1.0000,  0.9993,  0.9812,
          -0.9825,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000,  0.9998,
          -0.9989,  0.9992, -0.3938,  1.0000, -0.8379, -0.9995,  1.0000,
           0.9998,  0.6627, -1.0000, -1.0000,  1.0000,  0.9853, -0.9997,
          -0.9978, -1.0000]]], grad_fn=<TanhBackward0>)


### LSTM

The LSTM equations are
$$
i_t = \sigma(W_{ii}x_t + b_{ii} + W_{hi}h_{t-1} + b_{hi}) \\
f_t = \sigma(W_{if}x_t + b_{if} + W_{hf}h_{t-1} + b_{hf}) \\
g_t = tanh(W_{ig}x_t + b_{ig} + W_{hg}h_{t-1} + b_{ho}) \\
o_t = \sigma(W_{io}x_t + b_{io} + W_{ho}h_{t-1} + b_{ho}) \\
c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
h_t = o_t \odot tanh(c_t)
$$

In [None]:
class LSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size

        self.x2h = nn.Linear(input_size, 4*hidden_size)
        self.h2h = nn.Linear(hidden_size, 4*hidden_size)
        self.tanh = nn.Tanh()
        self.init_param()

    def init_param(self):
        std = 1.0 / math.sqrt(self.hidden_size)
        for p in self.parameters():
            p.data.uniform_(-std, std)

    def forward(self, input, states):
        h_t, c_t = states
        gates = self.x2h(input) + self.h2h(h_t)
        it, ft, gt, ot = torch.split(gates, self.hidden_size, dim = -1)
        it = torch.sigmoid(it)
        ft = torch.sigmoid(ft)
        gt = self.tanh(gt)
        ot = torch.sigmoid(ot)
        ct = ft*c_t + it * gt
        ht = ot * self.tanh(ct)
        return ht, ct

lstm_cell = LSTMCell(10,20)
inp = torch.rand(5,4,10)
ht = torch.rand(5,4,20)
ct = torch.rand(5,4,20)
h, c = lstm_cell(inp, (ht,ct))
print(h.shape, c.shape)

torch.Size([5, 4, 20]) torch.Size([5, 4, 20])


Can you add the bias in the LSTM Network?

## Let's use our neural networks

In [None]:
!curl -O https://download.pytorch.org/tutorial/data.zip; unzip data.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2814k  100 2814k    0     0  5277k      0 --:--:-- --:--:-- --:--:-- 5280k
Archive:  data.zip
   creating: data/
  inflating: data/eng-fra.txt        
   creating: data/names/
  inflating: data/names/Arabic.txt   
  inflating: data/names/Chinese.txt  
  inflating: data/names/Czech.txt    
  inflating: data/names/Dutch.txt    
  inflating: data/names/English.txt  
  inflating: data/names/French.txt   
  inflating: data/names/German.txt   
  inflating: data/names/Greek.txt    
  inflating: data/names/Irish.txt    
  inflating: data/names/Italian.txt  
  inflating: data/names/Japanese.txt  
  inflating: data/names/Korean.txt   
  inflating: data/names/Polish.txt   
  inflating: data/names/Portuguese.txt  
  inflating: data/names/Russian.txt  
  inflating: data/names/Scottish.txt  
  inflating: data/names/Spanish.txt  
  inflating

In [None]:
!pip install Unidecode

Collecting Unidecode
  Downloading Unidecode-1.3.8-py3-none-any.whl (235 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Unidecode
Successfully installed Unidecode-1.3.8


### Data Preparation

In [None]:
import os
import random
from string import ascii_letters

import torch
from torch import nn
import torch.nn.functional as F
from unidecode import unidecode

_ = torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
data_dir = "./data/names"

lang2label = {
    file_name.split(".")[0]: torch.tensor([i], dtype=torch.long)
    for i, file_name in enumerate(os.listdir(data_dir))
}
print(lang2label)

{'Portuguese': tensor([0]), 'Czech': tensor([1]), 'English': tensor([2]), 'German': tensor([3]), 'Italian': tensor([4]), 'Dutch': tensor([5]), 'Greek': tensor([6]), 'Spanish': tensor([7]), 'Chinese': tensor([8]), 'Irish': tensor([9]), 'Korean': tensor([10]), 'French': tensor([11]), 'Arabic': tensor([12]), 'Scottish': tensor([13]), 'Polish': tensor([14]), 'Russian': tensor([15]), 'Japanese': tensor([16]), 'Vietnamese': tensor([17])}


In [None]:
char2idx = {letter: i for i, letter in enumerate(ascii_letters + " .,:;-'")}
num_letters = len(char2idx)

def name2tensor(name):
    tensor = torch.zeros(len(name), num_letters)
    for i, char in enumerate(name):
        tensor[i][char2idx[char]] = 1
    return tensor

In [None]:
name2tensor("Hello")

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.

In [None]:
tensor_names = []
target_langs = []

for file in os.listdir(data_dir):
    with open(os.path.join(data_dir, file)) as f:
        lang = file.split(".")[0]
        names = [unidecode(line.rstrip()) for line in f]
        for name in names:
            try:
                tensor_names.append(name2tensor(name))
                target_langs.append(lang2label[lang])
            except KeyError:
                pass

In [None]:
from sklearn.model_selection import train_test_split

train_idx, test_idx = train_test_split(
    range(len(target_langs)),
    test_size=0.1,
    shuffle=True,
    stratify=target_langs
)

train_dataset = [
    (tensor_names[i], target_langs[i])
    for i in train_idx
]

test_dataset = [
    (tensor_names[i], target_langs[i])
    for i in test_idx
]

In [None]:
for curr_x, _ in train_dataset:
    print(curr_x.shape)
    break


torch.Size([7, 59])


In [None]:
print(f"Train: {len(train_dataset)}")
print(f"Test: {len(test_dataset)}")

Train: 18063
Test: 2007


### Creating the architecture

In [None]:
import torch.nn.functional as F
class RNNNetwork(nn.Module):
    def __init__(self, num_letters):
        super().__init__()
        self.rnn = torch.nn.RNN(num_letters, 100, 1, batch_first = True)
        self.linear = torch.nn.Linear(100,18)

    def forward(self, x):
        out, hidden = self.rnn(x)
        hd = self.linear(hidden[0])
        return hd
        # return F.softmax(hd, dim = -1)

In [None]:
myRNN = RNNNetwork(num_letters)
myRNN(curr_x).shape

torch.Size([18])

In [None]:
model = RNNNetwork(num_letters)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 1
print_interval = 3000

for epoch in range(num_epochs):
    random.shuffle(train_dataset)
    for i, (name, label) in enumerate(train_dataset):
        name = torch.unsqueeze(name, axis = 0)
        output= model(name)
        loss = criterion(output, label)
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()

        if (i + 1) % print_interval == 0:
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], "
                f"Step [{i + 1}/{len(train_dataset)}], "
                f"Loss: {loss.item():.4f}"
            )

Epoch [1/1], Step [3000/18063], Loss: 3.3134
Epoch [1/1], Step [6000/18063], Loss: 0.6150
Epoch [1/1], Step [9000/18063], Loss: 0.0048
Epoch [1/1], Step [12000/18063], Loss: 0.2323
Epoch [1/1], Step [15000/18063], Loss: 4.1626
Epoch [1/1], Step [18000/18063], Loss: 0.9784
