In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt 

# Creating the dataset

In [2]:
# importing the text
text = open("holy_grail.txt",'r').read()

# making a dictionary in which the characters correspond to numbers
characters = set(text)
numbers = range(0,len(characters))
characters_numbers = zip(characters, numbers)
dictionary = dict(characters_numbers) 

# making a list of numbers out of the text
text_list = list(text)
number_text = []
for character in text_list:
    number = dictionary[character]
    number_text.append(number)
    
# chunking the text 
sequence_list = []
for i in range(0,len(number_text)-25):
    sequence_list.append(number_text[i:i+25])

#lists with input and target sequences
input_list = []
target_list = []
for i in range(0, len(sequence_list) - 1):
    input_list.append(sequence_list[i])
    target_list.append(sequence_list[i+1])
data = np.array(input_list)
labels = np.array(target_list)

#making the dataset and iterator
tf.reset_default_graph()
dataset = tf.data.Dataset.from_tensor_slices((data, labels))
iterator = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)

next_batch = iterator.get_next()
initialize_iterator = iterator.make_initializer(dataset)

## Building the model

In [3]:
#get the batch 
input_data = next_batch[0]
input_data = tf.cast(input_data, tf.int32)
labels = next_batch[1]
labels = tf.cast(labels, tf.int32)

#make a one hot vector out of labels
one_hot_data = tf.one_hot(input_data, len(dictionary))
one_hot_labels = tf.one_hot(labels, len(dictionary))

#making a hidden state for remembered hidden state
hidden_state_placeholder = tf.placeholder(tf.float32, shape=(1,100))

#empty lists to save hidden list and logits in 
hidden_states = []
logits = []

#defining weights and biases
wxh = tf.Variable(tf.random_normal([len(dictionary), 100], stddev=0.1))
whh = tf.Variable(tf.random_normal([100, 100], stddev=0.1))
why = tf.Variable(tf.random_normal([100, len(dictionary)], stddev=0.1))
bh = tf.Variable(tf.random_normal([1, 100], stddev=0.1))
by = tf.Variable(tf.random_normal([1, len(dictionary)], stddev=0.1))

#iterate through the subsequence 
for i in range(0,25):
    #input_character = one_hot_data[i]
    input_character = tf.reshape(one_hot_data[i], [1, len(dictionary)])
#for input_character in one_hot_data:
    next_hidden_state = tf.tanh(input_character @ wxh + hidden_state_placeholder @ whh + bh)
    hidden_states.append(next_hidden_state)
    logit = next_hidden_state @ why + by
    logits.append(logit)
    
#getting results
last_hidden_state = hidden_states[-1]
last_logits = logits[-1]
output = tf.nn.softmax(last_logits)

#calculating loss
outputs = tf.concat(logits, axis=0)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=one_hot_labels, logits=outputs)
loss = tf.reduce_mean(cross_entropy)

#define the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)

#define the training step
training_step = optimizer.minimize(loss)

## Training and Sampling

In [None]:
#defining the epochs
epochs = 10

#starting Tensorflow session
with tf.Session() as sess:
    
    #inizializing variables
    sess.run(tf.global_variables_initializer())
    
    #making global step counter
    global_step = 0
    
    for epoch in range(epochs):               
        # inizializing hidden_state_placeholder
        hidden_state = np.zeros((1,100))           
        #initializing iterator with training data for this epoch
        sess.run(initialize_iterator)
        #going through batches once
        while True:
            try:
                #running training step
                _, hidden_state, loss_val  = sess.run([training_step, last_hidden_state, loss], feed_dict={hidden_state_placeholder: hidden_state})
                #incrementing global step
                global_step = global_step + 1
            except tf.errors.OutOfRangeError:
                break
                
        #SAMPLING
    
        #giving information about sample
        print('Epoch: ', epoch, ', Step: ', global_step, ', Loss: ', loss_val)
        
        #random subsequence
        random_number = np.random.choice(len(input_list))
        random_subsequence = input_list[random_number]
        #targets are only needed for iterator
        random_targets = target_list[random_number]
        
        #feeding subsequence into RNN
        subsequence_dataset = tf.data.Dataset.from_tensor_slices(random_subsequence, random_targets)
        
        #list for storing all sampled characters
        all_sampled_characters = []
        #sampling 200 characters
        for i in range(0,200):
            #feeding subsequence into RNN
            subsequence_dataset = tf.data.Dataset.from_tensor_slices(random_subsequence, random_targets)
            sess.run(iterator.make_initializer(subsequence_dataset))
            subsequence_softmax_output, hidden_state = sess.run([output_softmax, last_hidden_state], feed_dict={last_hidden_state: hidden_state})
            #choosing character from output of last softmax 
            sample_character = np.random.choice(a=len(dictionary), p=subsequence_softmax_output)
            #save sampled character
            all_sampled_characters.append(sample_character)
            #needed for sampling next character: first character of subsequence deleted, new sampled one appended
            random_subsequence.append(sample_character)
            random_subsequence.pop(0)
            
        #printing sampled characters
        for character in all_sampled_characters:
            print(dictionary[character], end='')