### Building blocks of RNN



Create a recurrent layer from **RNN**, perform a forward pass on an input of lenght of 3 to compute the output.

Then, manually compute the forward pass and compare the results with those of RNN.

Forward pass for Recurrent architecture. 

Using `torch.nn` module, a recurrent layer can be defined via **RNN**. Similar to the hidden-to-hidden recurrence. 

In [None]:
import torch
import torch.nn as nn
torch.manual_seed(1)
# input layer where the first dimension is the batch_size
rnn_layer = nn.RNN(input_size = 5, hidden_size = 2, num_layers = 1, batch_first = True)
w_xh = rnn_layer.weight_ih_l0
w_hh = rnn_layer.weight.hh_l0
b_xh = rnn_layer.bias_ih_l0
b_hh = rnn_layer.bias_hh_l0
print('w_xh shape:', w_xh.shape)
print('w_hh shape:', w_hh.shape)
print('b_xh shape:', b_xh.shape)
print('b_hh shape:', b_hh.shape)

In [None]:
x_seq = torch.tensor([[1.0]*5, [2.0]*5, [3.0]*5]).float()
# output of the sample RNN
output, hn = rnn_layer(torch.reshape(x_seq, (1,3,5)))
# manually computing the output
out_man = []
for t in range(3):
    xt = torch.reshape(x_seq[t], (1,5))
    print(f'Time step {t} =>')
    print(' Input :', xt.numpy())
    
    ht = torch.matmul(xt, torch.transpose(w_xh, 0, 1)) + b_hh
    print(' Hidden :', ht.detach.numpy())
    if t > 0:
        prev_h = oot_man[t-1]
    else:
        prev_h = torch.zeros((ht.shape))
    ot = ht + torch.matmul(prev_h, torch.transpose(w_hh, 0, 1)) + h_hh
    # Hyberbolic tangent as the activation function since RNN uses it as default
    ot = torch.tanh
    out_man.append(ot)
    
    print(' Output (manual):', ot.detach().numpy())
    print(' RNN output:', output[:, t].detach().numpy())
    print()
    

#### Building an RNN model

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers = 2, batch_first = True)
        # self.rnn = nn.GRU(input_size, hidden_size, num_layers = 2, batch_first = True)
        # self.rnn = nn.LSTM(input_size, hidden_size, num_layers = 2, batch_first = True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        _, hidden = self.rnn(x)
        out = hidden[-1, :, :] # we use the final hidden state from the last hidden layer as input to the fully connected layer
        out = self.fc(out)
        return out
    
model = RNN(64, 32)
print(model)
model(torch.randn(5, 3, 64))

#### More on Bidirectional RNN

In [None]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, rnn_hidden_size, fc_hidden):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx = 0)
        self.rnn = nn.LSTM(embed_dim, rnn_hidden_size, batch_first = True, bidirectional = True)
        self.fc1 = nn.Linear(rnn_hidden_size*2, fc_hidden_size)
        self_relu = nn.ReLU()
        self.fc2 = nn.Linear(fc_hidden_size, 1)
        self_sigmoid = nn.Sigmoid()
    
    def forward(self, text, lengths):
        out = self.embedding(text)
        out = nn.utils.rnn.pack_padded_sequence(out, lengths.cpu().numpy, enforce_sorted = False, batch_first = True)
        _, (hidden, cell) = self.rnn(out)
        out = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim = 1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        
        return out

In [None]:
torch.manual_seed(1)
model = RNN(vocab_size, embed_dim, rnn_hidden_size, fc_hidden_size)
model