In [1]:
'''Text Generation using LSTM cells. Model trained over Harry Potter and the Order of Phoenix'''

import re
import collections
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import io
import time

start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"

#Target log path
logs_path = '/tmp/tensorflow/rnn_words'

#Reading and preprocessing data
def read_data(fname):
    with open(fname) as f:
        content = f.read()
    content = content.replace('\n', '')  #Removing newlines
    content = content.replace("'","")
    content = re.sub("(?<=\w)(['!?,@.:;-])", r' \1', content)  #Spacing out special characters
    content = io.StringIO(content).readlines()
    content = [x.strip() for x in content]
    content = [content[i].split() for i in range(len(content))] #Split String into words
    content = np.array(content)
    content = np.reshape(content, [-1, ]) #Reshape into 1-D array
    return content

def build_dataset(words):
    count = collections.Counter(words).most_common()  #Returns a dictionary with count of each word arranged in descending order of count
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary) #Basically assigns a number to each word 
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

train_data=read_data("HP.txt")
dictionary, reverse_dictionary = build_dataset(train_data)


# Parameters
learning_rate = 0.003
training_iters = 100000
display_step = 100
batch_size=256
time_steps=3
in_size=1
vocab_size=len(dictionary)
num_layers=3
# number of units in RNN cell
n_hidden = 512

def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

# tf Graph input
X = tf.placeholder("float", [None, time_steps, in_size]) # X_shape = No_of_batches X time_step X no_inputs
y = tf.placeholder("float", [batch_size, vocab_size])

# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size])) # Weights_shape = hidden_units X vocab_size
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

def LSTM_cell():
    cell = tf.contrib.rnn.NASCell(n_hidden, reuse=tf.get_variable_scope().reuse)
    return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=0.8)

    
def RNN(X,weights,biases):
    #Stacking 3 LSTM cells, each LSTM cell has n_hidden memory/LSTM units
    
    rnn_cell = rnn.MultiRNNCell([LSTM_cell() for _ in range(num_layers)])
    
    #Calculate the outputs of the rnn_cell, last_state=outputs[-1]. 
    outputs,last_state=tf.nn.dynamic_rnn(rnn_cell,X,dtype=tf.float32) #Shape of Outputs=n_batches X time_steps X n_hidden
    #Basically you are swapping dimensions 0 and 1. outputs[-1] will give outputs of the last tim estep
    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))  
    results=tf.matmul(outputs[-1],weights['out'])+biases['out']
    
    with tf.name_scope('Weights'):
            variable_summaries(weights['out'])
    
    with tf.name_scope('Biases'):
            variable_summaries(biases['out'])
    
    with tf.name_scope('Activations'):
            tf.summary.histogram('Activations',results)
    return (results)

pred=RNN(X,weights,biases)

#Cost,optimizer,accuracy
with tf.name_scope("Cross_Entropy_Cost"):
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
    tf.summary.scalar('Cross_Entropy_Cost',cost)

optimizer=tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))

accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar('Accuracy',accuracy)
# Initializing the variables
init = tf.global_variables_initializer()

merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(logs_path)
saver = tf.train.Saver(tf.global_variables())
config = tf.ConfigProto()
config.gpu_options.allow_growth=True

In [None]:
# Launch the graph
with tf.Session(config=config) as session:
    session.run(init)
    step = 0
    offset = 0
    end_offset = time_steps + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters:# Generate a minibatch. Add some randomness on selection process.
        batch_x=np.zeros((batch_size,time_steps,in_size))
        batch_y=np.zeros((batch_size,vocab_size))
        for batch in range(batch_size):
            offset = random.randint(0,np.shape(train_data)[0])
            if (offset > (len(train_data)-end_offset)):
                offset = len(train_data)-end_offset
            batch_x[batch]=np.reshape(np.array([ [dictionary[ str(train_data[i])]] for i in range(offset, offset+time_steps) ]),\
                                      [time_steps,in_size])
            #One-hot encoding of immediate word
            y_onehot = np.zeros([vocab_size], dtype=float)
            y_onehot[dictionary[str(train_data[offset+time_steps])]] = 1.0
            batch_y[batch]=y_onehot

        summary,_, acc, loss, onehot_pred = session.run([merged,optimizer, accuracy, cost, pred], \
                                                feed_dict={X: batch_x, y: batch_y})
        writer.add_summary(summary, step)
        #Stores total loss and total accuracy in 100 iters 
        loss_total += loss
        acc_total += acc
        if (step) % display_step == 0:
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            print("Iter= " + str(step) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            
            summary, _ = session.run([merged, optimizer],
                              feed_dict={X: batch_x, y: batch_y},
                              options=run_options,
                              run_metadata=run_metadata)
            writer.add_run_metadata(run_metadata, 'step%03d' % step)
            writer.add_summary(summary, step)
            print('Adding run metadata for', step)
            acc_total = 0
            loss_total = 0
            saver.save(session,"./saver/model.ckpt")
            print("Model saved ")
        step += 1
        offset += (time_steps+1)

In [None]:
with tf.Session() as session:
    session.run(init)
    saver.restore(session,"./saver/model.ckpt")
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    print("\ttensorboard --logdir=%s" % (logs_path))
    print("Point your web browser to: http://localhost:6006/")
    while True:
        prompt = "%s words: " 
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        #if len(words) != n_input:
            #continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(100):
                keys = np.reshape(np.array(symbols_in_keys), [-1, time_steps, 1])
                onehot_pred = session.run(pred, feed_dict={X:keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
        except e:
            print(e)

INFO:tensorflow:Restoring parameters from ./saver/model.ckpt
Optimization Finished!
Elapsed time:  27.374006509780884 sec
Run on command line.
	tensorboard --logdir=/tmp/tensorflow/rnn_words
Point your web browser to: http://localhost:6006/
%s words: Harry Potter said
Harry Potter said George , looking , said Harry , his Dereliction , and he was not at , I dont , said Harry , trying at the door . Harry was not a Longbottom of the Weasley . He had just the the of tryouts of the Ministry , said Harry , trying at the back of his head , but he had been a , said Hagrid , looking , said Harry . He , who was a passing , said Harry , his a voice , then , said Harry , and he was sure to be , who was
%s words: Albus Dumbledore and
Albus Dumbledore and the others , said Harry , his eyes , but the door , said Harry , trying , said Harry , trying , said Hermione , looking , said Harry , trying his head . I think think to be , who was now to narrow the door . . . I was - a very , then , said Harry ,