# Vanilla recurrent neural network

We will use many to one RNN as we perform positive or negative sentimen analysis

Resouce:https://victorzhou.com/blog/intro-to-rnns/

In [1]:
from data import train_data, test_data
import numpy as np
from numpy.random import randn
import random

In [2]:


# Create the vocabulary.
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size = len(vocab)
print('%d unique words found' % vocab_size)

# Assign indices to each word.
word_to_idx = { w: i for i, w in enumerate(vocab) }
idx_to_word = { i: w for i, w in enumerate(vocab) }
# print(word_to_idx['good'])
# print(idx_to_word[0])

18 unique words found


In [3]:
def createInputs(text):
    """
    Returns an array of one-hot vectors representing the words in the input text string.
  - text is a string
  - Each one-hot vector has shape (vocab_size, 1)
    """
    
    inputs = []
    for w in text.split(' '):
        v = np.zeros((vocab_size, 1))
        v[word_to_idx[w]] = 1
        inputs.append(v)
    return inputs

def softmax(xs):
  # Applies the Softmax Function to the input array.
  return np.exp(xs) / sum(np.exp(xs))

In [4]:
class RNN:
    def __init__(self, input_size, output_size, hidden_size=64):
        # Weights
        self.Whh = randn(hidden_size, hidden_size) / 1000
        self.Wxh = randn(hidden_size, input_size) / 1000
        self.Why = randn(output_size, hidden_size) / 1000

        # Biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))
    
    def forward(self, inputs):
        '''
        Perform a forward pass of the RNN using the given inputs.
        Returns the final output and hidden state.
        - inputs is an array of one hot vectors with shape (input_size, 1).
        '''
        h = np.zeros((self.Whh.shape[0], 1))

        self.last_inputs = inputs
        self.last_hs = { 0: h }

        # Perform each step of the RNN
        for i, x in enumerate(inputs):
            h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)
            self.last_hs[i + 1] = h

        # Compute the output
        y = self.Why @ h + self.by

        return y, h

    def backprop(self, d_y, learn_rate=2e-2):
        
        n = len(self.last_inputs)

        # Calculate dL/dWhy and dL/dby.
        d_Why = d_y @ self.last_hs[n].T
        d_by = d_y

        # Initialize dL/dWhh, dL/dWxh, and dL/dbh to zero.
        d_Whh = np.zeros(self.Whh.shape)
        d_Wxh = np.zeros(self.Wxh.shape)
        d_bh = np.zeros(self.bh.shape)

        # Calculate dL/dh for the last h.
        # dL/dh = dL/dy * dy/dh
        d_h = self.Why.T @ d_y

        # Backpropagate through time.
        for t in reversed(range(n)):
            # An intermediate value: dL/dh * (1 - h^2)
            temp = ((1 - self.last_hs[t + 1] ** 2) * d_h)
            
            # dL/db = dL/dh * (1 - h^2)
            d_bh += temp
            
            # dL/dWhh = dL/dh * (1 - h^2) * h_{t-1}
            d_Whh += temp @ self.last_hs[t].T
            
            # dL/dWxh = dL/dh * (1 - h^2) * x
            d_Wxh += temp @ self.last_inputs[t].T

            # Next dL/dh = dL/dh * (1 - h^2) * Whh
            d_h = self.Whh @ temp

        # Clip to prevent exploding gradients.
        for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -1, 1, out=d)

        # Update weights and biases using gradient descent.
        self.Whh -= learn_rate * d_Whh
        self.Wxh -= learn_rate * d_Wxh
        self.Why -= learn_rate * d_Why
        self.bh -= learn_rate * d_bh
        self.by -= learn_rate * d_by

In [5]:
# Initialize our RNN!
rnn = RNN(vocab_size, 2)

inputs = createInputs('i am very good')
out, h = rnn.forward(inputs)
probs = softmax(out)
print(probs)

[[0.49999639]
 [0.50000361]]


In [6]:
def processData(data, backprop=True):
    items = list(data.items())
    random.shuffle(items)

    loss = 0
    num_correct = 0

    for x, y in items:
        inputs = createInputs(x)
        target = int(y)

        # Forward
        out, _ = rnn.forward(inputs)
        probs = softmax(out)

        # Calculate loss / accuracy
        loss -= np.log(probs[target])
        num_correct += int(np.argmax(probs) == target)

        if backprop:
            # Build dL/dy
            d_L_d_y = probs
            d_L_d_y[target] -= 1

            # Backward
            rnn.backprop(d_L_d_y)

    return loss / len(data), num_correct / len(data)

# Training loop
for epoch in range(3000):
    train_loss, train_acc = processData(train_data)
    if epoch % 100 == 99:
        print('--- Epoch %d' % (epoch + 1))
        print('Train:\tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

        test_loss, test_acc = processData(test_data, backprop=True)
        print('Test:\tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))

--- Epoch 100
Train:	Loss 0.687 | Accuracy: 0.483
Test:	Loss 0.704 | Accuracy: 0.500
--- Epoch 200
Train:	Loss 0.670 | Accuracy: 0.690
Test:	Loss 0.735 | Accuracy: 0.550
--- Epoch 300
Train:	Loss 0.582 | Accuracy: 0.690
Test:	Loss 0.724 | Accuracy: 0.550
--- Epoch 400
Train:	Loss 0.423 | Accuracy: 0.776
Test:	Loss 0.707 | Accuracy: 0.600
--- Epoch 500
Train:	Loss 0.315 | Accuracy: 0.862
Test:	Loss 0.611 | Accuracy: 0.650
--- Epoch 600
Train:	Loss 0.103 | Accuracy: 0.966
Test:	Loss 0.698 | Accuracy: 0.850
--- Epoch 700
Train:	Loss 0.025 | Accuracy: 1.000
Test:	Loss 0.531 | Accuracy: 0.900
--- Epoch 800
Train:	Loss 0.006 | Accuracy: 1.000
Test:	Loss 0.459 | Accuracy: 0.900
--- Epoch 900
Train:	Loss 0.004 | Accuracy: 1.000
Test:	Loss 0.350 | Accuracy: 0.950
--- Epoch 1000
Train:	Loss 0.002 | Accuracy: 1.000
Test:	Loss 0.329 | Accuracy: 0.950
--- Epoch 1100
Train:	Loss 0.002 | Accuracy: 1.000
Test:	Loss 0.313 | Accuracy: 0.950
--- Epoch 1200
Train:	Loss 0.001 | Accuracy: 1.000
Test:	Loss 0

In [7]:
inputs = createInputs('i am very good')
out, h = rnn.forward(inputs)
probs = softmax(out)
print(probs)

[[9.65972376e-04]
 [9.99034028e-01]]


In [12]:
probs>.9

array([[False],
       [ True]])