## Recurrent Neural Networks

**Students:** Guillem Amat (ga98), Sebastián Soriano Pérez (ss1072)

<br>

**Importing Packages**

In [187]:
#Regular Packages
import numpy as np
import pandas as pd
import pdb
import os
from tqdm.notebook import tqdm

In [132]:
#Torch Packages
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch

<br>

### Q1: Recurrent Neural Network

In [134]:
from gen_gbu import gen_gbu

In [135]:
words, y = gen_gbu()

<br>

**Word Embedding**

In [136]:
vocabulary = set([word for sentence in words for word in sentence])
vocabulary

{'bad', 'good', 'uh'}

In [137]:
# One-hot encoding
word2vec = {word: [(word == 'good') * 1, (word == 'bad') * 1, (word == 'uh') * 1] 
            for i, word in enumerate(vocabulary, 1)}
word2vec

{'bad': [0, 1, 0], 'good': [1, 0, 0], 'uh': [0, 0, 1]}

In [138]:
X = [[word2vec[word] for word in sentence] for sentence in words]

In [139]:
print(f'Unique words: {len(word2vec)}')
print(f'First Word sentence: {words[0]}')
print(f'First Int sentence: {X[0]}')

Unique words: 3
First Word sentence: ['uh', 'uh', 'uh', 'uh', 'uh', 'uh', 'uh', 'uh', 'uh', 'good', 'uh', 'uh']
First Int sentence: [[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 0, 1]]


<br>

**Padding**

In [140]:
max_ = 0
for X_i in X:
    max_ = len(X_i) if len(X_i) > max_ else max_
max_

19

In [141]:
# Filling each sentence in X with vectors [0, 0, 0] to have equal length
# Loop through each sentence in X
for i in range(len(X)):
    zeros = 19 - len(X[i])
    X[i] += [[0, 0, 0] for j in range(zeros)]

<br>

**Train-Test Split**

In [142]:
from sklearn.model_selection import train_test_split

In [143]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [144]:
X_train, X_test = np.array(X_train), np.array(X_test)
y_train, y_test = np.array(y_train), np.array(y_test)

In [149]:
X_train, X_test = torch.from_numpy(X_train), torch.from_numpy(X_test)
y_train, y_test = torch.from_numpy(y_train), torch.from_numpy(y_test)
y_train, y_test = y_train.reshape(-1, 1), y_test.reshape(-1, 1)

In [151]:
X_train.shape

torch.Size([800, 19, 3])

<br>

**Recurrent Neural Network**

In [366]:
class RNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
                        
        self.linear = nn.Linear(input_dim + output_dim, output_dim, False)
        
    def forward(self, x):
        x = self.linear(x)
        return x

In [367]:
rnn = RNN(3, 1)

In [381]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.1)

In [382]:
rnn.parameters

<bound method Module.parameters of RNN(
  (linear): Linear(in_features=4, out_features=1, bias=False)
)>

In [385]:
list(rnn.parameters())

[Parameter containing:
 tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]

In [384]:
for epoch in range(50):
    y_hat = []
    for i, X_i in enumerate(X_train):
        #Loop through each word in sentence
        for j, x in enumerate(X_i):
            x = x.float()
            x_combined = torch.cat((x, torch.tensor([0], dtype=torch.float))) if j == 0 else torch.cat((x, torch.tensor([y_hat[0]], dtype=torch.float)))
            # Propagate the network forward 
            output = float(rnn(x_combined.float()))
            if j == 0:
                y_hat.append(output)
            else:
                y_hat[i] = output
    
    y_hat = torch.tensor(y_hat, dtype=torch.float, requires_grad=True)
    y_hat = y_hat.reshape(-1, 1)
    # Get the loss and take a gradient step
    loss = criterion(y_hat, y_train.float())
    print(list(rnn.parameters()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(list(rnn.parameters()))
    # Print loss
    if epoch % 5 == 0:
        print(loss)
    
    

[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
tensor(68.5523, grad_fn=<MseLossBackward>)
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]


[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
tensor(68.5523, grad_fn=<MseLossBackward>)
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]
[Parameter containing:
tensor([[0.4270, 0.4523, 0.4058, 0.2829]], requires_grad=True)]


In [349]:
list(rnn.parameters())

[Parameter containing:
 tensor([[-0.2346,  0.4588, -0.0806, -0.4404]], requires_grad=True)]

<br>

### Q2: Custom Implementation

In [122]:
W = np.array([1, -1, 0, 1])

In [123]:
def custom_rnn(X, W):
    y_hat = []
    # Loop through each sentence in X
    for i, X_i in enumerate(X):
        #Loop through each word in sentence
        for j, x in enumerate(X_i):
            x_combined = np.append(x, [0]) if j == 0 else np.append(x, y_hat[i])
            output = np.dot(x_combined, W)
            if j == 0:
                y_hat.append(output)
            else:
                y_hat[i] = output
    return y_hat

In [124]:
custom_rnn(X, W) == y

True

In [185]:
torch.from_numpy(np.array(y))

tensor([  1,   1,  -5,   0,   0,  -7,  -7,   7, -15,   0,  19,  -8,   8,   0,
          2, -13, -13,  -4,   6,  -1,  -7,  -3, -11,   0,   0,  -1,  -9,   0,
         12,  -6,   0,  18, -13,  -1,   0, -10,   0,  -1,   3,  12,  -6,   0,
          5,  -1,  -2,   2,   4,   5,  -3,  -8,  -9,  -6,   7,  17,   9,   6,
         10,  -5,  -4,  11,  -1,  -2,   0,   0,  -2,  13,   9,   0, -17,   0,
         -9,  -3,   0, -19,  -7,  -1,   0,   0,  -2,   4,   0, -13,   5,   2,
          2,  -1,   1,  -2,  13,  11, -14,  16,   0,  -4,   8,   0,   9,   1,
          3,   5,   2,   1,   1,  -6,  13,  -7, -18,   0,   0,   7,   9,  13,
         -1,  10,   6,   5,   1,  -8, -13,   0,  10,   2,  -5,   7,  -6,  -7,
         11,   1, -14,   3,   1,  17,   1,  -9,   9,   3,  -5,   6,   5,   3,
         -1, -18,   9,   2,  -9,   0,   4, -11,   6,   7,   3,  -6,  -9,  10,
         -8,   0,   4,  -5,   0, -13,  -7,  11,  -5,  -3,  10,   0,   0,   1,
         17,  10,   2,   0,  -8,   5,  -9,   7,  -3,  10,  -2,  