## Recurrent Neural Networks

**Student:** Guillem Amat (ga98), Sebastian Soriano Pérez(ss1072)

<br>

**Importing Packages**

In [2]:
#Regular Packages
import numpy as np
import pandas as pd
import pdb
import os

In [3]:
#Torch Packages
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch

In [6]:
os.chdir('C:\\Users\\guill\\Desktop\\Current Semester\\Natural Language Processing\\Homeworks\\Homework_7')

<br>

### Q1: Recurrent Neural Network

In [4]:
from gen_gbu import gen_gbu

In [5]:
words, sentiment = gen_gbu()

<br>

**Data Structuring**

In [7]:
sentences = [word for word in words[:25]]

In [8]:
#Creating two objects containing all words and the full text
all_words = [word for sentence in sentences for word in sentence]
full_text = ' '.join(all_words)

<br>

**Word Embedding**

In [9]:
from collections import Counter

In [10]:
counts = Counter(all_words)
counts

Counter({'good': 79, 'uh': 99, 'bad': 102})

In [11]:
vocabulary = sorted(counts, key=counts.get, reverse=True)
vocabulary

['bad', 'uh', 'good']

In [12]:
word2int = {word: i for i, word in enumerate(vocabulary, 1)}
word2int

{'bad': 1, 'uh': 2, 'good': 3}

In [13]:
sentences2int = []

for sentence in sentences:
    sentences2int.append([word2int[word] for word in sentence])

In [14]:
print(f'Unique words: {len(word2int)}')
print(f'First Word sentence: {sentences[0]}')
print(f'First Int sentence: {sentences2int[0]}')

Unique words: 3
First Word sentence: ['good', 'good', 'good', 'good', 'good', 'uh', 'bad', 'uh', 'good', 'good', 'uh', 'good', 'good', 'bad', 'uh']
First Int sentence: [3, 3, 3, 3, 3, 2, 1, 2, 3, 3, 2, 3, 3, 1, 2]


<br>

**Padding Features**

In [24]:
def pad_features(sentences2int, pad_length):
    #pdb.set_trace()
    pad_list = np.zeros((len(sentences2int), pad_length), dtype=int)
    for i, row in enumerate(sentences2int):
        pad_list[i, -len(row):] = np.array(row)[:pad_length]
    return pad_list

In [27]:
assert(len(padded_sentences)==len(sentences2int))
assert(len(padded_sentences[0])==len())

In [36]:
X = pad_features(sentences2int, 20)

In [44]:
y = np.array(sentiment[:25])

In [39]:
assert(len(y)==len(X))

In [45]:
y

array([  7,  -6, -16,  12,  -1,  -8,  -1,   7,   1,   0, -12,   0,   3,
        -4,   1,   0,  14,   0,  -2, -18,  16,  -5,  -4,  -1,  -6])

<br>

**Train-Test Split**

In [46]:
from sklearn.model_selection import train_test_split

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [54]:
X_train, y_train

(array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
        [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 3, 3, 3, 2, 3, 3, 2, 2, 3],
        [0, 0, 0, 0, 0, 0, 0, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1],
        [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 3, 2, 2, 1, 3, 2, 1, 1, 3, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 2, 2, 3, 3, 2],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3, 2, 3, 2, 2, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 3, 1, 2, 1, 1, 2, 1],
        [0, 0, 0, 0,

In [55]:
#Tensor Transformation
train_data = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
test_data  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

In [57]:
#Data Loader
batch_size = 1
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size) 
test_loader  = DataLoader(test_data, shuffle=True, batch_size=batch_size)

<br>

**Recurrent Neural Network**

In [None]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.Wx = torch.randn(n_inputs, n_neurons)
        self.Wh = torch.randn(n_neurons, n_neurons)
        
        self.b = torch.zeros(1, n_neurons)
    
    def forward(self, x, hidden):
        return torch.mm(x, self.Wx) + torch.mm(hidden, self.Wh) + self.b

In [None]:
rnn = RNN()

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)