In [1]:
import numpy as np
import random
import copy
from ipynb.fs.full.Utils import *

### Read the dataset

In [2]:
#Read the dataset
#The data contains list of dinosaur names.
with open("./dinos_names.txt") as fid:
    data = fid.read()

#convert all characters to lowercase
data = data.lower()

chars = list(set(data))
data_size,vocab_size = len(data),len(chars)

print("The size of the data:",data_size)
print("The vocab size:",vocab_size)

The size of the data: 19909
The vocab size: 27


The dataset contains 19909 characters with 27 unique characters.

In [3]:
chars = sorted(chars)

print("The unique characters are:\n",chars)

The unique characters are:
 ['\n', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [4]:
"""
Create a dictionary with mapping between:
  1. character as key and index as value
  2. index as key and character as value
"""
char_to_index = {ch:i for i,ch in enumerate(chars)}
index_to_char = {i:ch for i,ch in enumerate(chars)}

### Gradient Clipping

In [5]:
def clip(gradients,maxValue):
    """
    Clip the graidents above maxValue and -maxValue
    """
    dWaa,dWax,dWya,db,dby = gradients["dWaa"],gradients["dWax"],gradients["dWya"],gradients["db"],gradients["dby"]
    
    for gradient in [dWaa,dWax,dWya,db,dby]:
        np.clip(gradient,-maxValue,maxValue,out=gradient)
        
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    
    return gradients

In [6]:
#Test the gradient clipping method
maxValue = 10

np.random.seed(3)

dWax = np.random.randn(5,3)*10
dWaa = np.random.randn(5,5)*10
dWya = np.random.randn(2,5)*10
db = np.random.randn(5,1)*10
dby = np.random.randn(2,1)*10

gradients = {"dWax": dWax, "dWaa": dWaa, "dWya": dWya, "db": db, "dby": dby}

gradients = clip(gradients, maxValue)

print("gradients[\"dWaa\"][1][2] =", gradients["dWaa"][1][2])
print("gradients[\"dWax\"][3][1] =", gradients["dWax"][3][1])
print("gradients[\"dWya\"][1][2] =", gradients["dWya"][1][2])
print("gradients[\"db\"][4] =", gradients["db"][4])
print("gradients[\"dby\"][1] =", gradients["dby"][1])

gradients["dWaa"][1][2] = 10.0
gradients["dWax"][3][1] = -10.0
gradients["dWya"][1][2] = 0.2971381536101662
gradients["db"][4] = [10.]
gradients["dby"][1] = [8.45833407]


### Sampling

In [7]:
def sample(parameters, char_to_ix):
    """
    Sample a sequence of characters according to a sequence of probability distributions output of the RNN
    """
    #retrieve the parameters
    Waa,Wya,Wax,b,by = parameters["Waa"],parameters["Wya"],parameters["Wax"],parameters["b"],parameters["by"]
    n_a = Waa.shape[1]
    
    vocab_size = by.shape[0]
    
    x = np.zeros((vocab_size,1))
    a_prev = np.zeros((n_a,1))
    
    indices = []
    idx = -1
    
    counter = 0
    
    newline_char = char_to_ix['\n']
    
    while(counter!=50 and idx!=newline_char):
        a_next = np.tanh(np.dot(Wax,x) + np.dot(Waa,a_prev) + b)
        y_pred = softmax(np.dot(Wya,a_next) + by)
        
        idx = np.random.choice(list(range(vocab_size)),p=y_pred.ravel())
        
        indices.append(idx)
        
        x = np.zeros((vocab_size,1))
        x[idx] = 1
        
        a_prev = a_next
        
        counter +=1
    
    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    return indices

In [8]:
#Test the Sampling method
np.random.seed(2)
_, n_a = 20, 100
Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(n_a, n_a), np.random.randn(vocab_size, n_a)
b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)
parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}


indices = sample(parameters, char_to_index)
print("Sampling:")
print("list of sampled indices:\n", indices)
print("list of sampled characters:\n", [index_to_char[i] for i in indices])

Sampling:
list of sampled indices:
 [12, 23, 24, 14, 7, 2, 10, 23, 25, 12, 3, 24, 15, 24, 3, 20, 3, 17, 4, 0]
list of sampled characters:
 ['l', 'w', 'x', 'n', 'g', 'b', 'j', 'w', 'y', 'l', 'c', 'x', 'o', 'x', 'c', 't', 'c', 'q', 'd', '\n']


### Build the Language Model

In [9]:
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    # Forward propagate through time
    loss, cache = rnn_forward(X, Y, a_prev, parameters)
    
    # Backpropagate through time
    gradients, a = rnn_backward(X, Y, parameters, cache)
    
    # Clip the gradients 
    gradients = clip(gradients, 5)
    
    # Update parameters
    parameters = update_parameters(parameters, gradients, learning_rate)
    
    return loss, gradients, a[len(X)-1]

In [10]:
np.random.seed(1)

vocab_size, n_a = 27, 100

#initialize a0
a_prev = np.random.randn(n_a, 1)

#initialize the weights & biases
Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(n_a, n_a), np.random.randn(vocab_size, n_a)
b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)

parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}

X = [12, 3, 5, 11, 22, 3]
Y = [4, 14, 11, 22, 25, 26]

initial_parameters = copy.deepcopy(parameters)

loss, gradients, a_last = optimize(X, Y, a_prev, parameters, learning_rate = 0.01)

print("Loss =", loss)
print("gradients[\"dWaa\"][1][2] =", gradients["dWaa"][1][2])
print("np.argmax(gradients[\"dWax\"]) =", np.argmax(gradients["dWax"]))
print("gradients[\"dWya\"][1][2] =", gradients["dWya"][1][2])
print("gradients[\"db\"][4] =", gradients["db"][4])
print("gradients[\"dby\"][1] =", gradients["dby"][1])
print("a_last[4] =", a_last[4])

Loss = 126.50397572165382
gradients["dWaa"][1][2] = 0.19470931534716368
np.argmax(gradients["dWax"]) = 93
gradients["dWya"][1][2] = -0.007773876032002977
gradients["db"][4] = [-0.06809825]
gradients["dby"][1] = [0.01538192]
a_last[4] = [-1.]


### Train the model

In [11]:
def model(data_x, ix_to_char, char_to_ix, num_iterations = 35000, n_a = 50, dino_names = 7, vocab_size = 27, verbose = False):
    """
    Trains the model and generates dinosaur names. 
    
    Returns:
    parameters -- learned parameters
    """
    #Retrieve n_x,n_y from vocab size
    n_x,n_y = vocab_size,vocab_size
    
    #Initialize the parameters
    parameters = initialize_parameters(n_a,n_x,n_y)
    
    # Initialize the hidden state of your LSTM
    a_prev = np.zeros((n_a, 1))
    
    examples = [x.strip() for x in data_x]
    
    np.random.shuffle(examples)
    
    for itr in range(num_iterations):
        #retrieve a random index
        idx = itr%len(examples)
        
        #Sample name 
        samples_chars = examples[idx]
        
        #Sample name represented in indices of chars
        sample_indices = [char_to_ix[c] for c in samples_chars]
        
        X = [None] + sample_indices
        
        Y = X[1:]
        
        ix_newline = [char_to_ix["\n"]]
        Y = Y + ix_newline

        # Perform one optimization step: Forward-prop -> Backward-prop -> Clip -> Update parameters
        # learning rate is 0.01
        curr_loss, gradients, a_prev = optimize(X, Y, a_prev, parameters, learning_rate = 0.01)
        
        if itr % 1000 == 0:
            
            print('Iteration: %d, Loss: %f' % (itr, curr_loss) + '\n')
            
            # The number of dinosaur names to print
            for name in range(dino_names):
                
                # Sample indices
                sampled_indices = sample(parameters, char_to_ix)
                
                # Get sample name from the sampled indiced
                last_dino_name = get_sample(sampled_indices, ix_to_char)
                
                print(last_dino_name.replace('\n', ''))
                
                
      
            print('\n')
        
    return parameters

In [12]:
parameters = model(data.split("\n"), index_to_char, char_to_index, 22001)

Iteration: 0, Loss: 39.551161

Vcszjftdxvghrupmpmnecenjsrtgawiuxrpuxsnmygtkynaqed
Kzwjxuzbwwgxnvwxv
Yfhgugyabdlsstrqiquxtplox
Jv
Tudbnfzzapaqkfrjshkdx
Ikkyrvwfndmswduewrxidasgsrjaebgcbspcrzkavhuggnzqru
Afyfiibajyszhrr


Iteration: 1000, Loss: 34.023345

Icieobnaurus
Ehecosewun

Kapaaaurusus
Plus
Rrhhih
Esfam


Iteration: 2000, Loss: 20.119713

Ierumodengvtaccenus
Ilosaurus
Racorosadrus
Xanurus
Qun
Citdinibepmonophus
Lhesbnipgerogdon


Iteration: 3000, Loss: 29.855341


Thtan
Terolaus
Snkamenabopiius
Auxusifnaanpatosaurus
Rnnosaurun
Usaurus


Iteration: 4000, Loss: 29.458625

Angatagoratavyorosshlosgnanatops
Elongoslos
Brolihioplosaurus
Hisereatorwus
Naonaszosaurus
Irhyodorox
Opkossanniothis


Iteration: 5000, Loss: 42.375442

Sanrinosaurus
Saurasyeredosaurus
Ongopopne
Guriangoniden
Ugobantantenys
Dylpenagosaurus
Ldyrenonis


Iteration: 6000, Loss: 20.569226

Hurahosaurus
Vaptrosaurus
Arraprosaurus
Pesharhus
Xinosaurus
Nalohologypheroptorantar
Pyononla


Iteration: 7000, Loss: 16.789906