In [7]:
train_data = {
    "good": True,
    "bad": False,
    "happy": True,
    "sad": False,
    "not good": False,
    "not bad": True,
    "not happy": False,
    "not sad": True,
    "very good": True,
    "very bad": False,
    "very happy": True,
    "very sad": False,
    "i am happy": True,
    "this is good": True,
    "i am bad": False,
    "this is bad": False,
    "i am sad": False,
    "this is sad": False,
    "i am not happy": False,
    "this is not good": False,
    "i am not bad": True,
    "this is not sad": True,
    "i am very happy": True,
    "this is very good": True,
    "i am very bad": False,
    "this is very sad": False,
    "this is very happy": True,
    "i am good not bad": True,
    "this is good not bad": True,
    "i am bad not good": False,
    "i am good and happy": True,
    "this is not good and not happy": False,
    "i am not at all good": False,
    "i am not at all bad": True,
    "i am not at all happy": False,
    "this is not at all sad": True,
    "this is not at all happy": False,
    "i am good right now": True,
    "i am bad right now": False,
    "this is bad right now": False,
    "i am sad right now": False,
    "i was good earlier": True,
    "i was happy earlier": True,
    "i was bad earlier": False,
    "i was sad earlier": False,
    "i am very bad right now": False,
    "this is very good right now": True,
    "this is very sad right now": False,
    "this was bad earlier": False,
    "this was very good earlier": True,
    "this was very bad earlier": False,
    "this was very happy earlier": True,
    "this was very sad earlier": False,
    "i was good and not bad earlier": True,
    "i was not good and not happy earlier": False,
    "i am not at all bad or sad right now": True,
    "i am not at all good or happy right now": False,
    "this was not happy and not good earlier": False,
}

test_data = {
    "this is happy": True,
    "i am good": True,
    "this is not happy": False,
    "i am not good": False,
    "this is not bad": True,
    "i am not sad": True,
    "i am very good": True,
    "this is very bad": False,
    "i am very sad": False,
    "this is bad not good": False,
    "this is good and happy": True,
    "i am not good and not happy": False,
    "i am not at all sad": True,
    "this is not at all good": False,
    "this is not at all bad": True,
    "this is good right now": True,
    "this is sad right now": False,
    "this is very bad right now": False,
    "this was good earlier": True,
    "i was not happy and not good earlier": False,
}

In [8]:
# Create the vocabulary.
vocab = list(set([w for text in train_data.keys() for w in text.split(" ")]))
vocab_size = len(vocab)


In [9]:
# Assign indices to each word.
word_to_idx = {w: i for i, w in enumerate(vocab)}


In [10]:
import numpy as np


def createInputs(text):
    """
    Returns an array of one-hot vectors representing the words
    in the input text string.
    - text is a string
    - Each one-hot vector has shape (vocab_size, 1)
    """
    inputs = []
    for w in text.split(" "):
        v = np.zeros((vocab_size, 1))
        v[word_to_idx[w]] = 1
        inputs.append(v)
    return inputs


In [11]:
import numpy as np
from numpy.random import randn
from softmax import softmax

# set random state to 42 for reproducibility
np.random.seed(42)


# default vectors is column vectors
# RNN Training: Stochastic Gradient Descent
# testing sentiment Analysis (many-to-one)
class RNN:
    def __init__(self, m, d, p, lr):
        # m is the number of hidden units
        self.m = m
        # d is the number of inputs
        self.d = d
        # p is the number of classes
        self.p = p
        # lr is the learning rate
        self.lr = lr
        # initialize weights
        self.wi = randn(self.d, self.m) / 1000
        self.wh = randn(self.m, self.m) / 1000
        self.wo = randn(self.m, self.p) / 1000
        # initialize bias vectors
        self.bo = np.zeros((self.p, 1))
        self.bh = np.zeros((self.m, 1))

    def forward_propagation(self, x, y):
        # feed-forward phase
        # create a 3d matrix to store the hidden states
        self.h = np.zeros((self.tau + 1, self.m, 1))
        for t in range(1, self.tau + 1):
            # activation function of the hidden layer is tanh
            self.h[t] = np.tanh(
                (self.wi.T @ x[t - 1]) + (self.wh.T @ self.h[t - 1]) + self.bh
            )
        self.o = softmax((self.wo.T @ self.h[self.tau]) + self.bo)
        self.n += int(np.argmax(self.o) == np.argmax(y))
        self.loss += -np.sum(np.log(self.o) * y)  # logarithm in base e

    def backpropagation(self):
        # backpropagation phase (loss function is cross-entropy)
        dL_do = self.o - self.y  # y is one-hot vector

        # calculate gradient of wo
        dL_dwo = self.h[self.tau] @ dL_do.T
        # calculate gradient of bo
        dL_dbo = dL_do

        # initialize gradients of weights to zero
        dL_dwh = np.zeros(self.wh.shape)
        dL_dwi = np.zeros(self.wi.shape)

        # initialize gradients of bias vectors to zero
        dL_dbh = np.zeros(self.bh.shape)

        # net gradients at h(tau)
        dL_dh = np.zeros((self.tau + 1, self.m, 1))
        dL_dh[self.tau] = (1 - self.h[self.tau] ** 2) * (self.wo @ dL_do)
        for t in range(self.tau - 1, 0, -1):  # net gradients at h(t)
            dL_dh[t] = (1 - self.h[t] ** 2) * (self.wh @ dL_dh[t + 1])

        for t in range(1, self.tau + 1):
            # gradients of weights
            dL_dwh += self.h[t - 1] @ dL_dh[t].T
            dL_dwi += self.x[t - 1] @ dL_dh[t].T

            # gradients of bias vectors
            dL_dbh += dL_dh[t]

        # update weights using gradient descent
        self.wo -= self.lr * dL_dwo
        self.wh -= self.lr * dL_dwh
        self.wi -= self.lr * dL_dwi

        # update bias vectors using gradient descent
        self.bo -= self.lr * dL_dbo
        self.bh -= self.lr * dL_dbh

    def train(self, data, test=False):
        self.n = 0  # number of correct predictions
        self.loss = 0  # total loss
        # data is a list of sequences
        for x, y in data:
            self.x = createInputs(x)
            self.tau = len(self.x)  # length of training sequence
            self.y = y
            # convert y to one-hot vector
            if self.y == True:
                self.y = np.array([[1], [0]])
            else:
                self.y = np.array([[0], [1]])
            # feed-forward phase
            self.forward_propagation(self.x, self.y)
            if not test:
                # backpropagation phase
                self.backpropagation()
        return self.n / len(data), self.loss / len(data)

In [12]:
import random

# random seed for reproducibility
random.seed(42)

# m is the number of hidden units
m = 64
# d is the number of inputs
d = vocab_size
# p is the number of classes
p = 2
# learning rate
lr = 2e-2

# create the RNN model
rnn = RNN(m, d, p, lr)

# maxiter is the number of epochs
maxiter = 1000

train_data = list(train_data.items())
test_data = list(test_data.items())

for i in range(maxiter):
    # shuffle the training data
    random.shuffle(train_data)
    acc, loss = rnn.train(train_data)
    if i % 100 == 99:
        print("--- Epoch %d" % (i + 1))
        print("Train:\tLoss %.3f | Accuracy: %.3f" % (loss, acc))

        # test the model with forward propagation
        acc, loss = rnn.train(test_data, test=True)
        print("Test:\tLoss %.3f | Accuracy: %.3f" % (loss, acc))

--- Epoch 100
Train:	Loss 0.686 | Accuracy: 0.552
Test:	Loss 0.696 | Accuracy: 0.500
--- Epoch 200
Train:	Loss 0.657 | Accuracy: 0.672
Test:	Loss 0.708 | Accuracy: 0.600
--- Epoch 300
Train:	Loss 0.171 | Accuracy: 0.983
Test:	Loss 0.171 | Accuracy: 0.950
--- Epoch 400
Train:	Loss 0.012 | Accuracy: 1.000
Test:	Loss 0.013 | Accuracy: 1.000
--- Epoch 500
Train:	Loss 0.005 | Accuracy: 1.000
Test:	Loss 0.006 | Accuracy: 1.000
--- Epoch 600
Train:	Loss 0.003 | Accuracy: 1.000
Test:	Loss 0.004 | Accuracy: 1.000
--- Epoch 700
Train:	Loss 0.002 | Accuracy: 1.000
Test:	Loss 0.003 | Accuracy: 1.000
--- Epoch 800
Train:	Loss 0.002 | Accuracy: 1.000
Test:	Loss 0.002 | Accuracy: 1.000
--- Epoch 900
Train:	Loss 0.001 | Accuracy: 1.000
Test:	Loss 0.002 | Accuracy: 1.000
--- Epoch 1000
Train:	Loss 0.001 | Accuracy: 1.000
Test:	Loss 0.002 | Accuracy: 1.000
