# Day 34 - RNNs from Scratch

In [1]:
import torch
import einops
from torch import nn, optim

In [2]:
class Recurrent(nn.Module):
    def __init__(self, n_hiddens=5):
        super().__init__()
        self.step = nn.Linear(n_hiddens, n_hiddens)
        self.input = nn.Linear(1, n_hiddens)
        self.out = nn.Linear(n_hiddens, 1)

    def forward(self, x):
        batch_size, length = x.shape
        out = torch.zeros([batch_size, length])
        self.state = torch.zeros([batch_size, n_hiddens])
        
        x = einops.rearrange(x, "b l -> l b 1")
        
        for i, x_i in enumerate(x):
            self.state = self.step(self.state) + self.input(x_i)
            step_out = self.out(torch.relu(self.state))
            out[:, i] = einops.rearrange(step_out, "b 1 -> b")

        return out

In [3]:
seqs = torch.tensor(
    [
        [-11, -10, -9, -8, -7],
        [35, 36, 37, 38, 39],
        [-9, -8, -7, -6, -5],
        [1, 2, 3, 4, 5],
        [4, 5, 6, 7, 8],
        [7, 8, 9, 10, 11],
        [12, 13, 14, 15, 16],
        [28, 29, 30, 31, 32],
    ],
    dtype=torch.float32,
)

In [4]:
n_hiddens = 128

In [5]:
net = Recurrent(n_hiddens=n_hiddens)
net

Recurrent(
  (step): Linear(in_features=128, out_features=128, bias=True)
  (input): Linear(in_features=1, out_features=128, bias=True)
  (out): Linear(in_features=128, out_features=1, bias=True)
)

In [6]:
optimizer = optim.Adam(net.parameters(), lr=2e-3)

In [7]:
loss_fn = nn.MSELoss()

In [8]:
preds = net(seqs)
preds

tensor([[ 2.3238e+00,  2.3118e+00,  1.0094e+00,  6.2448e-01,  2.2082e-01],
        [ 2.5880e-02,  4.4024e+00,  7.1682e+00,  9.2964e+00,  8.1232e+00],
        [ 1.8422e+00,  1.8051e+00,  6.7044e-01,  3.2457e-01, -6.6227e-02],
        [-5.8944e-03, -1.2544e-01, -1.9340e-02,  2.3520e-01,  4.8855e-01],
        [-8.5437e-02,  2.0807e-01,  4.8758e-01,  9.9195e-01,  1.1917e+00],
        [-8.7863e-02,  6.0734e-01,  1.0873e+00,  1.7871e+00,  1.8664e+00],
        [-9.4975e-02,  1.2734e+00,  2.1597e+00,  3.1287e+00,  2.9828e+00],
        [-1.7226e-02,  3.4439e+00,  5.6567e+00,  7.4026e+00,  6.5591e+00]],
       grad_fn=<CopySlices>)

In [9]:
loss = sum(loss_fn(preds[:, i], seqs[:, i] + 1) for i in range(seqs.shape[1]))
loss

tensor(1372.2815, grad_fn=<AddBackward0>)

In [10]:
optimizer.zero_grad()
loss.backward()
optimizer.step()

In [11]:
from tqdm.auto import tqdm

In [12]:
for _ in tqdm(range(10_000), desc="Epochs"):
    preds = net(seqs)
    loss = sum(loss_fn(preds[:, i], seqs[:, i] + 1) for i in range(seqs.shape[1]))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epochs:   0%|          | 0/10000 [00:00<?, ?it/s]

In [13]:
net(seqs)

tensor([[-9.9953, -9.0004, -7.9957, -7.0001, -6.0013],
        [36.0005, 36.9999, 37.9995, 39.0005, 40.0000],
        [-8.0058, -6.9996, -6.0052, -4.9998, -3.9984],
        [ 2.0004,  3.0002,  3.9993,  4.9995,  6.0010],
        [ 4.9994,  5.9973,  7.0014,  7.9998,  8.9990],
        [ 8.0021,  9.0025,  9.9992, 11.0014, 11.9999],
        [12.9990, 14.0004, 15.0000, 15.9992, 17.0006],
        [28.9993, 29.9998, 31.0007, 31.9994, 32.9998]], grad_fn=<CopySlices>)

In [14]:
new_seq = torch.tensor(
    [18, 19, 20, 21, 22],
    dtype=torch.float32,
)
new_seq = einops.rearrange(new_seq, "l -> 1 l")
net(new_seq)

tensor([[18.9981, 19.9996, 21.0043, 21.9967, 23.0007]], grad_fn=<CopySlices>)