In [2]:
import numpy as np
import random
import copy
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import time

from tensorflow.keras.optimizers import Adam

In [25]:
data = open('dino.txt', 'r').read()
data= data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('%d total %d unique' % (data_size, vocab_size))

19909 total 27 unique


In [8]:
chars = sorted(chars)
print(chars)

#TODO: Add another charachter, the empty charachter that just follows newlines
#Instead of having empty strings there that are guarantee to increase the loss function

char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

['\n', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [9]:
dataList = data.split()
np.random.shuffle(dataList)

mlen = 0
for dino in dataList:
    if len(dino) > mlen:
        mlen = len(dino)

n_x = n_y = len(chars)
m = len(dataList)
T_x = T_y = mlen + 1
X = np.zeros((n_x, m, T_x), dtype = np.float32)
Y = np.zeros((n_y, m, T_y), dtype = np.float32)

for ex in range(len(dataList)):
    for tx in range(len(dataList[ex])):
        ch = char_to_ix[dataList[ex][tx]]
        #X[0, :, :] = None, since we want it to generate dino names from scratch
        X[ch, ex, tx + 1] = 1
        Y[ch, ex, tx] = 1
    for i in range(tx + 1, 27):
        if i != 26:
            X[char_to_ix['\n'], ex, i + 1] = 1
        Y[char_to_ix['\n'], ex, i] = 1

#Set final value of y to \n to represent EOS token
# Y[char_to_ix['\n'], :, T_y - 1] += 1
        
X = tf.constant(X, dtype = tf.float32)
Y = tf.constant(Y, dtype = tf.float32)

In [10]:
def clip(gradients, maxValue):
    
    dWaa, dWax, dWya, dba, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['dba'], gradients['dby']
   
    for gradient in [dWax, dWaa, dWya, dba, dby]:
        gradient = tf.clip_by_value(gradient, -1 * maxValue, maxValue)
        
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "dba": dba, "dby": dby}
    
    return gradients

def sample(parameters, char_to_ix):
    
    Waa, Wax, Wya, by, ba = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['ba']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    x = tf.zeros([vocab_size, 1], dtype = tf.float32)
    a_prev = tf.zeros([n_a, 1], dtype = tf.float32)
    
    indices = []
    
    idx = -1
    
    counter = 0
    newline_character = char_to_ix['\n']
    
    while (idx != newline_character and counter != 50):
        
        a, y = rnn_single_cell(Waa, Wax, Wya, ba, by, a_prev, x)

        idx = np.random.choice(range(y.shape[0]), p = y.numpy().ravel())
        
        indices.append(idx)
        
        x = np.zeros((vocab_size, 1), dtype = np.float32)
        x[idx] = 1
        x = tf.constant(x, dtype = tf.float32)
        
        a_prev = a
        
        counter +=1

    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    return indices

def indices_to_str(indices, ix_to_char):
    dinoname = ''
    for ix in indices:
        dinoname += ix_to_char[ix]
        
    return dinoname

In [22]:
def initialize_parameters(n_a, n_x, n_y):
    
    parameters = {
        "Waa" : tf.Variable(tf.random.normal([n_a, n_a], dtype = tf.float32)),
        "Wax" : tf.Variable(tf.random.normal([n_a, n_x], dtype = tf.float32)),
        "Wya" : tf.Variable(tf.random.normal([n_y, n_a], dtype = tf.float32)),
        "ba" : tf.Variable(tf.zeros([n_a, 1], dtype = tf.float32)),
        "by" : tf.Variable(tf.zeros([n_y, 1], dtype = tf.float32))
    }
    
    return parameters

@tf.function
def rnn_single_cell(Waa, Wax, Wya, ba, by, a_prev, x):
    a_next = tf.math.tanh(Waa @ a_prev + Wax @ x + ba)
    y_hat = tf.nn.softmax(Wya @ a_next + by, axis = 0)
    
    return a_next, y_hat

def optimize(X, Y, a_prev, parameters, optimizer, vocab_size = 27):
    
    Waa, Wax, Wya = parameters["Waa"], parameters["Wax"], parameters["Wya"]
    ba, by = parameters["ba"], parameters["by"]
    
    n_x, m, T_x = X.shape
    n_y, m, T_y = Y.shape
    
    loss = tf.Variable(0, dtype = tf.float32)
    
    gradients = {}
    
    #Forward pass
    with tf.GradientTape(watch_accessed_variables = False) as tape:
        tape.watch(Waa)
        tape.watch(Wax)
        tape.watch(Wya)
        tape.watch(ba)
        tape.watch(by)
        tape.watch(loss)
        
        #T_x = T_y so only need to do T_x times
        for t in range(T_x):
            
            a_prev, y_hat = rnn_single_cell(Waa, Wax, Wya, ba, by, a_prev, X[:, :, t])
            
            #print(y_hat)
            #print(Y[:, :, t])
            loss = loss - (tf.multiply(Y[:, :, t], tf.math.log(y_hat)) + tf.multiply(1 - Y[:, :, t], tf.math.log(1 - y_hat + 0.0000001)))
            #print(loss)
                           
        loss = tf.reduce_sum(loss / m, axis = [0, 1])
    
    
    gradients = tape.gradient(loss, [Waa, Wax, Wya, ba, by])
    for i in range(len(gradients)):
        gradients[i] = tf.clip_by_value(gradients[i], -10, 10)
    optimizer.apply_gradients(zip(gradients, [Waa, Wax, Wya, ba, by]))
    

    
    return loss, parameters
        

In [19]:
X.shape

TensorShape([27, 1664, 27])

In [33]:
def model(X, Y, 
          ix_to_char, 
          char_to_ix, 
          num_epochs = 35000, 
          n_a = 50, 
          learning_rate = 0.01, 
          minibatch_size = None, 
          num_dinos = 3, 
          parameters = None):
    '''
    Inputs:
    X: Input data: tf.Constant of dimentions (n_x, m, T_x) 
    Y: Labels: tf.Constant of dimentions (n_y, m, T_y)
    ix_to_char: Dictionary that converts numbers to their respective charachters
    char_to_ix: Reverse of ix_to_char
    n_a: Size of activation
    minibatch_size: If set to None performs batch gradient descent
    num_dinos: Number of dinos to sample every 2000 iterations
    parameters: Default none, if continuing training from other params then put them in here
    
    Outputs: parameters - Dictionary containing weights required to sample
    '''
    
    n_x, m, T_x = X.shape
    
    n_y, m, T_y = Y.shape
    
    if not parameters:
        parameters = initialize_parameters(n_a, n_x, n_y)
    
    if m % minibatch_size != 0:
        print("Minibatch size must divide m")
        return
    
    if not minibatch_size or minibatch_size > m:
        minibatch_size = m
    
    num_mbatches = np.ceil(m / minibatch_size)
    
    last_loss = 0
    
    a0 = tf.Variable(np.zeros((n_a, minibatch_size)), dtype = tf.float32)
    
    start_time = time.time()
    
    optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    
    for j in range(num_epochs):
        for mb in range(int(num_mbatches)):
#             idx = j % (num_mbatches)
        
            start = int(mb * minibatch_size)
            stop = int((mb + 1) * minibatch_size)
        
            if stop > m:
                stop = m

            Xt = X[:, start:stop, :]
            Yt = Y[:, start:stop, :]

            #a_prev = tf.clip_by_value(a_prev, 0, 0)

            loss, parameters = optimize(Xt, Yt, a0, parameters, optimizer, vocab_size = T_x)
        
        
        if j % 5 == 0:
            print('Epoch: %d, Loss: %f' % (j, loss) + '\n')
            print("Seconds elapsed: {:.2f}".format(time.time() - start_time))

            print("Sampled Dino Names: ")
            for dino in range(num_dinos):
                    sampled_indices = sample(parameters, char_to_ix)
                    dino = indices_to_str(sampled_indices, ix_to_char)
                    print(dino)
            
            last_loss = loss
        
    return parameters

In [34]:
params = model(X, Y, ix_to_char, char_to_ix, num_epochs = 3000, n_a = 50, minibatch_size = 256, num_dinos = 5, parameters = None, learning_rate = 0.03)

Minibatch size must divide m


In [25]:
dinoParams = open("dinoParams.json", "wb")

pickle.dump(params, dinoParams)

dinoParams.close()

In [27]:
with open('dinoParams.json', 'rb') as f:
    params = pickle.load(f)

In [29]:
for i in range(100):
    print(indices_to_str(sample(params, char_to_ix), ix_to_char))

ossisaurus

dachilysaurus

alcatengsaurus

delrasaurus

braganodanodia

dyihodsaurus

tarisaurus

girotevong

sygostisaurus

aurhanus

saangosaurus

pahrhenus

centengosaurus

asandmus

tonklosaurra

astrinstteraaa

sinolrnathya

neajisceps

poreamrnloptops

cartoceratops

cinkyristes

conklendosaurus

colthalosaurus

haptinus

dytnosaurus

heraavenator

haplodrylongosaurus

cogamosaurus

prolur

netokoflis

deaaplansaurus

sitonccanlurosaurus

nechetoa

ligen

venighyn

doshonnimys

xitakrsaurus

laaplovecerator

goraatacosaurus

camproniphalus

liyrilsaurus

eucnosaurus

prorosaurus

rulongus

eorijtia

caudosteus

yevadasaurus

khicdinsaurus

roicosauruslogagsaurus

ayrisaurus

iptochatops

hemilsus

anctodonlar

horusenatin

nypfooplodrnthotinophossan

matosaurus

apelfihusaurus

pronyuynol

astanosaurus

zhetanoplosus

totosaurus

vuephonasaurus

unengapoa

danataratos

yurenkasaurus

darporns

juehansaurus

tapatosaurus

juioter

stalovenathus

siacsuin

botanimosaurus

baaethora

In [44]:
Waa, Wax, Wya, ba, by = params["Waa"], params["Wax"], params["Wya"], params["ba"], params["by"]

newline_char = char_to_ix['\n']

idx = -1

a_prev = np.zeros((params["Waa"].shape[0], 1), dtype = np.float32)
x = np.zeros((params["Wax"].shape[1], 1), dtype = np.float32)
indices = []

counter = 0
    
while (idx != newline_char):

    a, y = rnn_single_cell(Waa, Wax, Wya, ba, by, a_prev, x)

    idx = np.argmax(y.numpy(), axis = 0)[0]

    indices.append(idx)

    x = np.zeros((vocab_size, 1), dtype = np.float32)
    x[idx] = 1
    x = tf.constant(x, dtype = tf.float32)

    a_prev = a

    counter +=1

print(indices_to_str(indices, ix_to_char))
    

alnosaurus

