In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from utility import *

In [2]:
data=open('./dinos.txt').read()
data=data.lower()
chars=sorted(list(set(data)))
vocab_size=len(chars)
print(chars)
len(data.split('\n'))

['\n', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


1536

In [3]:
examples=[e.strip() for e in data.split('\n')]

In [4]:
char_to_ix={c:i for i,c in enumerate(chars)}
ix_to_char={i:c for i,c in enumerate(chars)}

In [5]:
def clip(gradients,maxval):
    dWax=gradients['dWax']
    dWya=gradients['dWya']
    dWaa=gradients['dWaa']
    dba=gradients['dba']
    dby=gradients['dby']
    
    for grad in [dWax,dWya,dWaa,dba,dby]:
        np.clip(grad,-maxval,maxval,out=grad)
        
    gradients['dWax']=dWax
    gradients['dWya']=dWya
    gradients['dWaa']=dWaa
    gradients['dba']=dba
    gradients['dby']=dby
    
    return gradients

In [7]:
def sample(parameters,max_size=30):
    Waa=parameters['Waa']
    Wax=parameters['Wax']
    Wya=parameters['Wya']
    ba=parameters['ba']
    by=parameters['by']
    
    n_a=Waa.shape[0]
    
    x=np.zeros((vocab_size,1))
    a_prev=np.zeros((n_a,1))
    
    indices=[]
    idx=-1
    
    while(idx!=char_to_ix['\n'] and len(indices)<=max_size):
        a_t=np.tanh(np.dot(Wax,x)+np.dot(Waa,a_prev)+ba)
        y=softmax(np.dot(Wya,a_t)+by)
        
        a_prev=a_t
        
        idx=np.random.choice(np.arange(len(y.ravel())),p=y.ravel())
        indices.append(idx)
        
        x=np.zeros((vocab_size,1))
        x[idx]=1
    
    if len(indices)>max_size:
        indices.append(char_to_ix['\n'])
    
    
    return indices

In [8]:
def optimize(X,Y,parameters,a_prev,lr):
    loss,cache=rnn_forward(X,Y,parameters,a_prev)
    
    gradients,a=rnn_backward(X,Y,parameters,cache)
    
#     print("Gradients after backward pass:")
#     for key, value in gradients.items():
#         print(f"{key}: {value}")
    
    gradients=clip(gradients,5)
    
#     print("Gradients after clipping:")
#     for key, value in gradients.items():
#         print(f"{key}: {value}")
    
    parameters=update_parameters(parameters,gradients,lr)
    
    return loss,a[:,len(X)].reshape(-1,1),parameters,gradients

In [9]:
def model(data,char_to_ix,ix_to_char,epochs=22000,learning_rate=0.01,n_a=64,sampling_size=7,vocab_size=27):
    n_x,n_y=vocab_size,vocab_size
    
    parameters=initialize(n_x,n_a,n_y)
    
    examples=[e.strip() for e in data]
    
    a_prev=np.zeros((n_a,1))
    
    for i in range(epochs):
        example_i=examples[i%len(examples)]
        
        single_example=[char_to_ix[c] for c in example_i]
        X=[None]+single_example
        
        Y=single_example+[char_to_ix['\n']]
        
        loss,a_prev,parameters,gradients=optimize(X,Y,parameters,a_prev,learning_rate)
        
        if i%2000==0:
            print(f'Current iteration: {i} Current loss {loss}')
            for j in range(sampling_size):
                sampled_indices=sample(parameters)
                word=get_sampled_indices(sampled_indices,ix_to_char)
                print(word.replace('\n',''))
            print('\n')
        
    return parameters

In [10]:
parameters=model(data.split('\n'),char_to_ix,ix_to_char)

Current iteration: 0 Current loss 46.139977228199065
Pdgfbgklxvgermxrcwvihvmxotbnyla
Qvivxkhok
C
R
Oedpmsjcivjqobtp
Diekudqzvetobjujrs
Ushpvplojyj


Current iteration: 2000 Current loss 28.42074870746157
Orlolonaus
Kraptur
Ros
Paptosaurus
Keros

Ymomourus


Current iteration: 4000 Current loss 25.262555742584038
Us
Tosaurus
Iangoidene
Oraman
Homutopn
Laoceaceor
Sanblongops


Current iteration: 6000 Current loss 26.23057348957724
Us
Saurus
Csamaiaur
Tin
Toggttacor
Enditeratataonus
On


Current iteration: 8000 Current loss 12.837914680376004

Cs
Scigoulelopeunaprreuloobioslcor
Rolnimus
Bus
Us
S


Current iteration: 10000 Current loss 23.241564695829474
Lovindratrengis
Leditvunylengsaurus
Tuligurisaurus
Perss
Anchis
Eroinesaurus
Inmiss


Current iteration: 12000 Current loss 47.90220700213956
Saurus
Saurus
Addls
Ri
Lolristrinbjelhasidisaurus
Ia
Vin


Current iteration: 14000 Current loss 31.43159923406772
Gauaurus
Candatataababiamkats
Rahthahbavaebiia
Lipaanosaurus
Miatmaaabhabemahadraani