In [2]:
import torch
import torch.nn as nn
import numpy as np
import torchinfo

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Recurrent Neural Network
***



In [60]:
class RNN_Scratch(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        self.waa = nn.Parameter(torch.rand(hidden_size, hidden_size), requires_grad=True).to(device)
        self.baa = nn.Parameter(torch.rand(1)).to(device)
        
        self.wax = nn.Parameter(torch.rand(hidden_size, input_size), requires_grad=True).to(device)
        self.bax = nn.Parameter(torch.rand(1)).to(device)
        
        self.way = nn.Parameter(torch.rand(hidden_size, hidden_size), requires_grad=True).to(device)
        self.bay = nn.Parameter(torch.rand(1)).to(device)

    def forward(self, x_input, hidden_input=None):
        x_input = x_input.to(device)
        
        hidden_input = torch.zeros(1, self.hidden_size).to(device) if not hidden_input else hidden_input
        h_val = torch.matmul(hidden_input, self.waa.T) + self.baa
        x_val = torch.matmul(x_input, self.wax.T) + self.bax
        # print(h_val.shape, x_val.shape)

        a_val = torch.tanh(h_val + x_val)
        z = torch.matmul(a_val, self.way.T) + self.bay
        y_val = torch.nn.Softmax(dim=1)(z)

        return a_val, a_val[-1]

# Pytorch RNN
***

**`nn.RNN(input_size, hidden_state, num_layers)`**
- `input_size`: the number of expect feature in the input x
- `hidden_size`: the number of features in the hidden state
- `num_layers`: the number of recurrent layers 
    <br></br>
1. `.forward(inputs, hidden_state=torch.zeros(1, hidden_state)))`
    - forward has two inputs, the previous activation layer and the input values
    - for the previous activation layer, when set to a dimension, create a zero vector of that dimension as its default value
    - inputs: represents the data you want to look through


2. **`Math` behind .foward()**
   - `input_size` = $x_t$
   - `input` = $[1 \text{ x } x_t]$
   - `input_bias` = $b_x$
   - `input_weights` = $w_{x}$
   - `hidden_size` = $h_t$
   - `hidden` = $[1 \text{ x } h_t]$
   - `hidden_bias` = $b_h$
   - `hidden_weights` = $w_{h}$

$$
a_t = f((x_t * w_t.T + b_x) + (h_t * w_h.T + b_h)
$$


- Input: $X = [1 \text{ x } x_t] $ and input weights $W_{ax} = [x_t \text{ x } h_t]$ Dimensions
- Hidden input: $H = [1 \text{ x } h_t]$ and hidden weights $[h_t \text{ x } h_t]$ Dimensions


In [61]:
torch.manual_seed(42)
data = torch.rand(30, 2)
data

tensor([[0.8823, 0.9150],
        [0.3829, 0.9593],
        [0.3904, 0.6009],
        [0.2566, 0.7936],
        [0.9408, 0.1332],
        [0.9346, 0.5936],
        [0.8694, 0.5677],
        [0.7411, 0.4294],
        [0.8854, 0.5739],
        [0.2666, 0.6274],
        [0.2696, 0.4414],
        [0.2969, 0.8317],
        [0.1053, 0.2695],
        [0.3588, 0.1994],
        [0.5472, 0.0062],
        [0.9516, 0.0753],
        [0.8860, 0.5832],
        [0.3376, 0.8090],
        [0.5779, 0.9040],
        [0.5547, 0.3423],
        [0.6343, 0.3644],
        [0.7104, 0.9464],
        [0.7890, 0.2814],
        [0.7886, 0.5895],
        [0.7539, 0.1952],
        [0.0050, 0.3068],
        [0.1165, 0.9103],
        [0.6440, 0.7071],
        [0.6581, 0.4913],
        [0.8913, 0.1447]])

In [62]:
rnn_scratch = RNN_Scratch(2, 1)

# rnn_scratch.wax = nn.Parameter(torch.Tensor([[-0.4683,  0.2004]])).to(device)
# rnn_scratch.waa = nn.Parameter(torch.Tensor([[-0.6782]])).to(device)
# rnn_scratch.bax = nn.Parameter(torch.Tensor([[0.4811]])).to(device)
# rnn_scratch.wax = nn.Parameter(torch.Tensor([0.5583])).to(device)
print(rnn_scratch.wax)
print(rnn_scratch.waa)
print(rnn_scratch.bax)
print(rnn_scratch.baa)
rnn_scratch.forward(data)

tensor([[0.6542, 0.3278]], device='cuda:0', grad_fn=<ToCopyBackward0>)
tensor([[0.5315]], device='cuda:0', grad_fn=<ToCopyBackward0>)
tensor([0.6532], device='cuda:0', grad_fn=<ToCopyBackward0>)
tensor([0.1587], device='cuda:0', grad_fn=<ToCopyBackward0>)


(tensor([[0.9340],
         [0.8802],
         [0.8523],
         [0.8454],
         [0.8998],
         [0.9243],
         [0.9165],
         [0.8932],
         [0.9185],
         [0.8312],
         [0.8120],
         [0.8562],
         [0.7483],
         [0.8048],
         [0.8249],
         [0.8975],
         [0.9190],
         [0.8612],
         [0.9026],
         [0.8583],
         [0.8732],
         [0.9197],
         [0.8897],
         [0.9089],
         [0.8785],
         [0.7239],
         [0.8295],
         [0.8986],
         [0.8861],
         [0.8942]], device='cuda:0', grad_fn=<TanhBackward0>),
 tensor([0.8942], device='cuda:0', grad_fn=<SelectBackward0>))

In [59]:
rnn = nn.RNN(2, 1)
print(rnn.state_dict())
rnn.forward(data) # returns the hiddent state, cell_state, and predicted_value

OrderedDict([('weight_ih_l0', tensor([[-0.6367,  0.9996]])), ('weight_hh_l0', tensor([[0.1889]])), ('bias_ih_l0', tensor([0.3082])), ('bias_hh_l0', tensor([-0.9327]))])


(tensor([[-0.2652],
         [ 0.0405],
         [-0.2588],
         [-0.0434],
         [-0.8000],
         [-0.6512],
         [-0.6253],
         [-0.6557],
         [-0.6282],
         [-0.2782],
         [-0.3864],
         [-0.0552],
         [-0.4075],
         [-0.6235],
         [-0.7949],
         [-0.8631],
         [-0.6462],
         [-0.1517],
         [-0.1170],
         [-0.5768],
         [-0.6487],
         [-0.2481],
         [-0.7126],
         [-0.5863],
         [-0.7699],
         [-0.4353],
         [ 0.1283],
         [-0.2945],
         [-0.5428],
         [-0.8177]], grad_fn=<SqueezeBackward1>),
 tensor([[-0.8177]], grad_fn=<SqueezeBackward1>))