### RNN example

In this notebook we will see an example of the RNN built in RNN.py. In this example we will be using the data person_names.txt to create new names.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from DNet.layers import NLLLoss, RNN
from DNet.optimizers import Adam
from DNet.model import NNet

In [2]:
PATH = Path.cwd()
PATH_DATA = PATH / 'data'

Lets set the random.seed in order to generate always the same weights.

In [3]:
np.random.seed(1)

The following functions are used to:

1. **one_hot_encoding** In order to transform letters into inputs.

2. **generate_names** Generate aleatory names.

In [4]:
def one_hot_encoding(input, size):
    """
    Do one hot encoding for a given input and size.
    
    Parameters
    ----------
    input : list
        list containing the numbers to make the 
        one hot encoding
    size : int
        Maximum size of the one hot encoding.
        
    Returns
    -------
    output : list
        List with the one hot encoding arrays.
    """
    output = []

    for index, num in enumerate(input):
        one_hot = np.zeros((size, 1))

        if (num != None):
            one_hot[num] = 1
    
        output.append(one_hot.tolist())

    return np.array(output)


def generate_names(index_to_character, model):
    """
    Generates a random names with the pretrained RNN.
    Parameters
    ----------
    index_to_character : dict
        Dictionary that relates the indexes with the letters
        to be used in order to create the name.
    Returns
    -------
    name : list
        List containing the final name predicted.
    """
    letter = None
    indexes = list(index_to_character.keys())

    letter_x = np.zeros((model.layers[0].input_dim, 1))
    name = []

    # similar to forward propagation.
    hidden = np.zeros((model.layers[0].hidden_dim , 1))

    while letter != '\n' and len(name)<15:
        
        hidden = model.layers[0].rnn_cell.forward(letter_x, hidden)
        input_softmax = model.layers[0].lineal.forward(hidden)
        y_pred = model.layers[0].softmax.forward(input_softmax)

        index = np.random.choice(indexes, p=y_pred.ravel())
        letter = index_to_character[index]

        name.append(letter)

        letter_x = np.zeros((model.layers[0].input_dim, 1))
        letter_x[index] = 1

    return "".join(name)

### Data

The data contains 18239 names.

In [5]:
input_dim = 27
output_dim = 27
hidden_dim = 50

# Load data with the names
person_names = open(PATH_DATA / 'person_names.txt', 'r').read()
person_names= person_names.lower()
characters = list(set(person_names))

character_to_index = {character:index for index,character in enumerate(sorted(characters))}
index_to_character = {index:character for index,character in enumerate(sorted(characters))}

with open(PATH_DATA / 'person_names.txt') as f:
    person_names = f.readlines()

person_names = [name.lower().strip() for name in person_names]
np.random.shuffle(person_names)

Example of some of the names contained in person_names.txt

In [6]:
print(person_names[:5])

['alysse', 'phoebe', 'jabarri', 'alban', 'shaqwana']


### The model

In [7]:
# Initialize the model
model = NNet()
# Create the model structure
model.add(RNN(input_dim, output_dim, hidden_dim))

loss = NLLLoss()
optim = Adam()

Example of prediction

In [8]:
for i in range(5):
    name = generate_names(index_to_character, model)
    print(name)

buosmjz

yxjepduggxxnwiz
khazlsrraxynzgo
jphvbaadkycqmie
easqkscywqjjmyw


In [9]:
# Train the model
costs = []
num_epochs = 30000

for epoch in range(num_epochs + 1):
    # create the X inputs and Y labels
    index = epoch % len(person_names)
    X = [None] + [character_to_index[ch] for ch in person_names[index]] 
    Y = X[1:] + [character_to_index["\n"]]

    # transform the input X and label Y into one hot enconding.
    X = one_hot_encoding(X, input_dim)
    Y = one_hot_encoding(Y, output_dim)

    model.forward(X)
    cost = model.loss(Y, loss)
    model.backward()
    model.optimize(optim)
    
    costs.append(cost)

    if epoch % 10000 == 0:
        print ("Cost after iteration %epoch: %f" %(epoch, cost))

Cost after iteration 0.000000e+00poch: 0.832073
Cost after iteration 1.000000e+04poch: 0.971043
Cost after iteration 2.000000e+04poch: 0.562794
Cost after iteration 3.000000e+04poch: 0.601051


In [10]:
for i in range(5):
    name = generate_names(index_to_character, model)
    print(name)

ketullanickei

lorita

ustal

foshanda

jicku



Really well! It seems the model can create now new names.

In [11]:
from DNet.layers import RNNCell

In [18]:
weights = np.array(
    [[ 0.81217268, -0.30587821, -0.26408588, -0.53648431],
   [ 0.43270381, -1.15076935,  0.87240588, -0.38060345]]
)

dZ = np.array([[ 1.74481176], [-0.7612069]])

In [19]:
input = np.array([[0.86540763], [-2.3015387]])
hidden = np.zeros((2, 1))
combined = np.concatenate((input, hidden), axis=0)

expected_d_hidden = np.array([[-0.13050669], [-0.26152525]])

input_dim = 2
hidden_dim = 2

recurrent = RNNCell(input_dim, hidden_dim)
recurrent.lineal.weights = weights

In [20]:
hidden = recurrent.forward(input, hidden)


In [21]:
obtained_d_hidden = recurrent.backward(
    dZ=dZ, hidden=hidden, combined=combined
)


In [22]:
obtained_d_hidden

array([[-0.10465545],
       [-0.19716043]])

In [24]:


np.testing.assert_almost_equal(expected_d_hidden, obtained_d_hidden)   

AssertionError: 
Arrays are not almost equal to 7 decimals

Mismatched elements: 2 / 2 (100%)
Max absolute difference: 0.06436482
Max relative difference: 0.32645913
 x: array([[-0.1305067],
       [-0.2615252]])
 y: array([[-0.1046554],
       [-0.1971604]])