In [41]:
# Import necessary libraries
import random
import numpy as np
from helper import softmax, get_weights


In [43]:
# Read the dinos.txt file
data = open('dinos.txt', 'r').read()

# Convert the data to lower case
data= data.lower()

# Convert the file data into list
chars = list(set(data))

# Get length of the file and length of the vocabulary
data_size, vocab_size = len(data), len(chars)

# Define dictionary of alphabets:integer
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }

# Define dictionary of integer:alphabets
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }

# Call the get_weights function from the helper file to get the model weights
# To get random weights set random=1
# To get the trained weights specify the number of iterations and set random=0
parameters = get_weights(num_iterations=15000, random=0)


There are 19910 total characters and 27 unique characters in your data.


In [42]:
# Function to predict the next set of characters which forms the dinosaur name
def rnn_model(parameters, char_to_ix):

    # Get the weights and biases from the parameters dictionary
    U, V, W, beta1, beta2 = parameters['U'], parameters['V'], parameters['W'], parameters['beta1'], parameters['beta2']

    # Get the size of the vocabulary i.e. 27
    # One for each alphabet plus the new line character
    vocab_size = beta2.shape[0]

    # Get the size of the weights
    n_h = U.shape[1]    

    # Initialize an array of zeros with size as the vocabulary size
    x = np.zeros((vocab_size, 1))

    # Initialize an array of zeros with size as the hidden state size    
    h_prev = np.zeros((n_h, 1))

    # Initialize a list to store the indices of the predicted characters
    indices = []
    
    # Initialize an idx variable to hold the index values of the characters 
    idx = -1 
    
    # Initialize a counter to fix the maximum length of the predicted word
    counter = 0

    # Get the value of the new line from the char_to_ix dictionary
    newline_character = char_to_ix['\n']
    
    # Loop until the newline_character is predicted or until the max length of the word is 50
    while (idx != newline_character and counter != 50):

        # Compute the new state h of the RNN unit using the equation given in the instructions
        h = np.tanh(np.dot(V, x) + np.dot(U, h_prev) + beta1)

        # Compute the output of the RNN unit using the equation given in the instructions
        y = softmax(np.dot(W, h) + beta2)
        
        # Get the index value of the predicted character
        idx = np.random.choice(list(range(vocab_size)), p=y.ravel())

        # Append the index value to the indices list
        indices.append(idx)
        
        # Initialize an input with zeros
        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        
        # Update the previous state value with the current state
        h_prev = h
        
        # Increment the counter
        counter +=1
    
    # If the counter value reaches 50 append a newline character to the indices list
    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    # Return the list of indices
    return indices


In [44]:
# Call the rnn_model function to generate a Dinosaur name
generated_indices = rnn_model(parameters, char_to_ix)

# Convert the list of indices returned by the predict function to 
# their respective characters and then join to form a word
txt = ''.join(ix_to_char[ix] for ix in generated_indices)

# Captializing the first character
txt = txt[0].upper() + txt[1:] 

# Print the generated word
print('%s' % (txt, ), end='')

Slnoria
