In [1]:
import numpy as np
import tensorflow as tf
import random 

In [2]:
text_char = open("holy_grail.txt",'r').read()

In [3]:
# Generate the vocabulary.
vocab = list(set(text_char))
vocab_size = len(vocab)
print(vocab)
print("vocab size: {}".format(vocab_size))

text_len = len(text_char)
print("text lenght: {}".format(text_len))

['5', ',', 'x', 'E', 'k', 'N', '`', '0', 'o', 'p', 'R', 'c', 'F', 'V', 'g', ']', 'z', 'J', '2', 'S', '3', 't', 's', 'i', '7', 'Q', 'D', 'L', 'I', 'u', 'H', '-', 'G', 'n', 'K', 'q', 'B', '"', 'Z', 'P', 'y', 'j', 'A', '\n', 'U', '!', 'M', ')', ' ', 'Y', 'v', '8', '?', 'm', '6', 'T', 'W', '4', '.', 'w', 'C', 'a', 'O', 'l', '[', ':', 'h', '(', '9', 'e', "'", 'f', 'r', '#', 'b', 'd', '1', ';']
vocab size: 78
text lenght: 60061


In [4]:
# Create dictionaries to switch between the indices of the characters and the characters themselves.
char_to_idx = { ch:i for i,ch in enumerate(vocab) }
idx_to_char = { i:ch for i,ch in enumerate(vocab) }

# Translate the text to indices.
text_idx = [char_to_idx[c] for c in text_char]

In [5]:
# Set the lenght of sequences we want to train on.
seq_len = 25

# Generate the dataset.
input_data = []
target_data = []
for i in range(text_len-seq_len):
    input_data.append(text_idx[i:i+seq_len])
    target_data.append(text_idx[i+1:i+seq_len+1])

In [6]:
tf.reset_default_graph()
# Create the TensorFlow dataset.
dataset = tf.data.Dataset.from_tensor_slices((input_data,target_data))

# We do not train on batches or shuffle the dataset.
iterator = tf.data.Iterator.from_structure(dataset.output_types,dataset.output_shapes)
iterator_init_op = iterator.make_initializer(dataset)

In [7]:
# Get the input from the generator.
next_batch = iterator.get_next()
input_data = next_batch[0]
target_data = next_batch[1]

# Create one hot tensors.
input_one_hot = tf.one_hot(input_data, depth=vocab_size)
target_one_hot = tf.one_hot(target_data, depth=vocab_size)

# Initialize the placeholder for the hidden state.
hidden_size = 100
init_hs = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32)    

In [8]:
# Build the model.
with tf.variable_scope("RNN", reuse=tf.AUTO_REUSE) as scope:
    
    # Set hidden state.
    hs_t = init_hs
    # Initialize list to save the hidden states and outputs of the sequence.
    hs = []
    ys = []
    
    
    # Initialize all weights and biases.
    initializer = tf.random_normal_initializer(stddev=0.1)
    Wxh = tf.get_variable("Wxh", [vocab_size, hidden_size], initializer=initializer)
    Whh = tf.get_variable("Whh", [hidden_size, hidden_size], initializer=initializer)
    Why = tf.get_variable("Why", [hidden_size, vocab_size], initializer=initializer)
    bh  = tf.get_variable("bh", [hidden_size], initializer=initializer)
    by = tf.get_variable("by", [vocab_size], initializer=initializer)
    
    # Unfold the RNN for as many steps as our sequence is long.
    for t in range(seq_len):
                
        # Read out the ith input and the ith target character
        xs_t = input_one_hot[t,:]
        xs_t = tf.expand_dims(xs_t, axis=0)
        ts_t = target_one_hot[t,:]
        
        # Compute the new hidden state.
        hs_t = tf.tanh(tf.matmul(xs_t, Wxh) + tf.matmul(hs_t, Whh) + bh)
        # Compute the new output.
        ys_t = tf.matmul(hs_t, Why) + by
        # Store hidden state and output.
        hs.append(hs_t)
        ys.append(ys_t)
        
# The RNN is done. 
# Save the hidden state for feeding it to the next sub sequence.
hs_remember = hs[0]
print(ys)
# Compute the softmax of the very last prediction for sampling.
output_softmax = tf.nn.softmax(ys[-1])
# Compute the loss of all the outputs.
outputs = tf.concat(ys, axis=0)
print(outputs)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=target_one_hot, logits=outputs))

[<tf.Tensor 'RNN/add_2:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_5:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_8:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_11:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_14:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_17:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_20:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_23:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_26:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_29:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_32:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_35:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_38:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_41:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_44:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_47:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_50:0' shape=(1, 78) dtype=float32>, <tf.Tensor 'RNN/add_53:0' shape=(1, 78) dtype=floa

In [9]:
# Optimizer.
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
training_step = optimizer.minimize(loss)

In [10]:
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(100):
    
        # Set time step counter.
        t = 0
        # Load the dataset into the iterator.
        sess.run(iterator_init_op)
        hs_remember_val = np.zeros([1,hidden_size])

        # Go through the dataset until its empty.
        while True:
            try:
                # If we are in the first time step intialize the hidden state with zeros.
                #if t == 0:
                #    hs_remember_val = np.zeros([1,hidden_size])
                
                # Feed in the last hidden state.
                # Read out the loss value for printing.
                # Read out the hidden state for the next forward step.
                # Do the training step.
                hs_remember_val, loss_val, _ = sess.run([hs_remember, loss, training_step], feed_dict={init_hs: hs_remember_val})
                
                # Increment the time step.
                t += 1

            # Stop if iterator is empty.
            except tf.errors.OutOfRangeError:
                break
                
          
        # After each epoch we print the loss value.
        print("Epoch: {}, Loss: {:f}".format(epoch, loss_val))
        
        # The main validation procedure we will use is sampling a text from our model to see how good we 
        # approximate the original dataset.
        
        # How many characters would we like to sample?
        sample_len = 200
        
        # Get a random starting sequence from our training dataset.
        start_idx = random.randint(0, len(text_idx) - seq_len)
        seq_idx = text_idx[start_idx:start_idx + seq_len]      
      
        # List to store the characters sampled by our model.
        sample_seq_idx        = []
        sample_hs_remember_val = np.zeros([1,hidden_size])

        # Sample as many characters as we would like.
        for n in range(sample_len):
            
            # To feed the starting sequence into our model we first need to put it into a dataset.
            # As we do not compare anything here we need some fake target values.
            fake_target = np.zeros([1,25], dtype=np.int32)
            sample_dataset = tf.data.Dataset.from_tensor_slices(([seq_idx], fake_target))
            # Load this dataset into the iterator.
            sess.run(iterator.make_initializer(sample_dataset))
    
            # Now we need to read out two things. The softmax output for sampling a character and the hidden state to
            # feed it in again.
            sample_output_softmax_val, sample_hs_remember_val = sess.run([output_softmax, hs_remember],
                                                                       feed_dict={init_hs: sample_hs_remember_val})

            
            
            # Sample a character from the softmax output distribution and append it.
            sample = np.random.choice(range(vocab_size), p=sample_output_softmax_val.ravel())
            sample_seq_idx.append(sample)
            # Update the start sequence for sampling the next character
            seq_idx = seq_idx[1:] + [sample]
        
      
        # Print sample.
        sample_txt = ''.join(idx_to_char[idx] for idx in sample_seq_idx)
        print('----\n %s \n----\n' % (sample_txt,))       

Epoch: 0, Loss: 2.837211
----
 kEab
 bARTHUR:  Sy rhakk
  ARTHUR:  Sa sapr
  ARTHUR:  Stard mealacn dantacisemallote it
  ARTHUR:  Ist!
  BEDEMDRDM:  Yot,!  ARTHED:  Yad!  IADEETHUR:  Int!
  ARTHUR:  Sartitl! ckdeatenkctspannous an 
----



KeyboardInterrupt: 