# Import

In [1]:
'''
A Recurrent Neural Network (LSTM) implementation example using TensorFlow
Next word prediction after n_input words learned from text file
A story is automatically generated if the predicted word is fed back as input
Based on Rowel Atienza's code
'''

from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn

import numpy as np
import random
import collections

import time

# Record Elapsed time

In [2]:
def time_elapsed(t):

    if t<60:
        return str(t) + " second(s)"
    elif t<(60*60):
        return str(t/60) + " minute(s)"
    else:
        return str(t/(60*60)) + " hour(s)"

# Build Dataset

In [3]:
# read word data from files
def read_data(fname):
    
    with open(fname) as f:
        content = f.readlines()
    # strip useless indent    
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content


#module for building dictionary and reverse dictionary.
def build_dataset(words):
    #Return a list of the n most common elements and their counts from the most common to the least. 
    #If n is omitted or None, most_common() returns all elements in the counter.
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        #key of the dictionary is the word and take the index of frequencies as the corresponding value
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

# Function for Building RNN Layers

In [4]:
def module_RNN(x, weights, biases):
    
    #weights and biases are dictionaries with a key as 'out' and content as a matrix or an array
    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])

    # Generate a n_input-element sequence of inputs
    # (e.g. [A] [slave] [named] -> [9] [10] [37])
    x = tf.split(x,n_input,1)
    
    # basic RNN cell
    #rnn_cell = rnn.BasicRNNCell(n_hidden)

    # 1-layer LSTM with n_hidden units
    rnn_cell = rnn.BasicLSTMCell(n_hidden)
    
    # 2-layer LSTM, each layer has n_hidden units. And you can wrap more layers together by doing list comprehension.
    #rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # 3-layer LSTM, each layer has n_hidden units. 
    # And you can wrap more layers together by doing list comprehension.
    #rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # generate prediction
    # creates a recurrent neural network specified by RNNCell cell
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

# Parameters and Load Data

In [5]:
# Parameters
learning_rate = 0.001

#training_iters = 100000
training_iters = 5000

display_step = 10
# number of words to be put into the network
n_input = 3

# number of words prediction after n_input
n_output = 5


# number of units in RNN cell
#n_hidden = 60
n_hidden = 4


# Text file containing words for training
training_file = 'simpleStory1.txt'
#training_file = 'simpleStory2.txt'
#training_file = 'androcles.txt'

training_data = read_data(training_file)
print("Loaded training data...")

dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

Loaded training data...


# Main Function

In [None]:
# TensorFlow Graph input
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])


# Logs_path
logs_path = 'C:/Users/Ali/logs/RNNs'
writer  = tf.summary.FileWriter(logs_path)

# RNN output node weights and biases
weights = {'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))}
biases  = {'out': tf.Variable(tf.random_normal([vocab_size]))}

# get the prediction in the probability form from the output of wraped RNN layers
pred = module_RNN(x, weights, biases)

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

start_time = time.time()

# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0, n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters:

        # Generate a mini-batch
        # Add some randomness on selection process

        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)
            
        #select size of n_input data from the training set and reshape them.
        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        
        #set up the one-hot-encoding of the labels and reshape them
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])
        
        # run optimizer,accuracy,cost and pred tensors and get the results
        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], feed_dict={x: symbols_in_keys, y: symbols_out_onehot})

        loss_total += loss
        acc_total += acc
        
        #display the trend of loss and accuracy
        if (step+1) % display_step == 0:
            print("Iter = " + str(step+1) + ", average loss= " + \
                  "{:0.6f}".format(loss_total/display_step) + ", average accuracy= " + \
                  "{:0.2f}%".format(100*acc_total/display_step))
            #reset
            acc_total = 0
            loss_total = 0
            
            #have a brief view of the relationship of input, real value and prediction result
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            
            print("%s - [%s] vs [%s]" % (symbols_in, symbols_out, symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
    
    t_elapsed = time.time() - start_time
    print("Elapsed time: ", time_elapsed(t_elapsed))
    
    #Take a simple experiment using customized input
    while True:
        
        prompt = "insert %s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            
            # how many words
            
            for i in range(n_output):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(pred, feed_dict={x: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence, reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
                
            print(sentence)
        except:
            print("cannot be found in dictionary")

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Iter = 10, average loss= 2.661386, average accuracy= 10.00%
['Sarah', 'had', 'dinner'] - [with] vs [Steve]
Iter = 20, average loss= 3.029405, average accuracy= 20.00%
['had', 'breakfast', 'with'] - [David] vs [Steve]
Iter = 30, average loss= 2.746608, average ac

['with', 'Sarah', '.'] - [Sarah] vs [.]
Iter = 650, average loss= 2.393001, average accuracy= 0.00%
['lunch', 'with', 'Sarah'] - [.] vs [with]
Iter = 660, average loss= 1.648762, average accuracy= 30.00%
['lunch', 'with', 'Sarah'] - [.] vs [with]
Iter = 670, average loss= 1.875926, average accuracy= 30.00%
['.', 'Steve', 'had'] - [breakfast] vs [.]
Iter = 680, average loss= 1.813057, average accuracy= 30.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 690, average loss= 2.012819, average accuracy= 10.00%
['breakfast', 'with', 'David'] - [.] vs [with]
Iter = 700, average loss= 1.578828, average accuracy= 40.00%
['dinner', 'with', 'Steve'] - [.] vs [with]
Iter = 710, average loss= 1.956154, average accuracy= 20.00%
['dinner', 'with', 'Steve'] - [.] vs [with]
Iter = 720, average loss= 1.736931, average accuracy= 40.00%
['.', 'Sarah', 'had'] - [dinner] vs [.]
Iter = 730, average loss= 1.803130, average accuracy= 20.00%
['with', 'Sarah', '.'] - [Sarah] vs [.]
Iter = 740, average lo

Iter = 1440, average loss= 1.525982, average accuracy= 30.00%
['dinner', 'with', 'Steve'] - [.] vs [with]
Iter = 1450, average loss= 1.337168, average accuracy= 50.00%
['.', 'Sarah', 'had'] - [dinner] vs [.]
Iter = 1460, average loss= 1.759434, average accuracy= 20.00%
['David', 'had', 'lunch'] - [with] vs [with]
Iter = 1470, average loss= 1.168724, average accuracy= 50.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 1480, average loss= 1.595197, average accuracy= 20.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 1490, average loss= 1.097615, average accuracy= 80.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 1500, average loss= 1.331238, average accuracy= 50.00%
['had', 'lunch', 'with'] - [Sarah] vs [.]
Iter = 1510, average loss= 2.332646, average accuracy= 0.00%
['with', 'Sarah', '.'] - [Sarah] vs [.]
Iter = 1520, average loss= 1.507725, average accuracy= 40.00%
['with', 'Sarah', '.'] - [Sarah] vs [.]
Iter = 1530, average loss= 1.738695, average accuracy

['lunch', 'with', 'Sarah'] - [.] vs [.]
Iter = 2260, average loss= 0.879632, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 2270, average loss= 1.243387, average accuracy= 60.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 2280, average loss= 1.236588, average accuracy= 60.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 2290, average loss= 0.786995, average accuracy= 80.00%
['Sarah', '.', 'Sarah'] - [had] vs [.]
Iter = 2300, average loss= 1.758764, average accuracy= 30.00%
['.', 'Sarah', 'had'] - [dinner] vs [dinner]
Iter = 2310, average loss= 1.566204, average accuracy= 50.00%
['lunch', 'with', 'Sarah'] - [.] vs [.]
Iter = 2320, average loss= 1.347494, average accuracy= 60.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 2330, average loss= 1.551125, average accuracy= 60.00%
['.', 'Sarah', 'had'] - [dinner] vs [dinner]
Iter = 2340, average loss= 1.671390, average accuracy= 50.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 2350, ave

Iter = 3050, average loss= 0.708908, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 3060, average loss= 1.138654, average accuracy= 70.00%
['David', 'had', 'lunch'] - [with] vs [with]
Iter = 3070, average loss= 1.069995, average accuracy= 70.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 3080, average loss= 0.637402, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 3090, average loss= 1.201359, average accuracy= 70.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 3100, average loss= 0.566855, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 3110, average loss= 1.920852, average accuracy= 20.00%
['with', 'Steve', '.'] - [Steve] vs [.]
Iter = 3120, average loss= 1.039433, average accuracy= 70.00%
['had', 'dinner', 'with'] - [Steve] vs [.]
Iter = 3130, average loss= 0.890024, average accuracy= 90.00%
['lunch', 'with', 'Sarah'] - [.] vs [.]
Iter = 3140, average loss= 1.558353, average accur

Iter = 3840, average loss= 1.821340, average accuracy= 20.00%
['had', 'dinner', 'with'] - [Steve] vs [.]
Iter = 3850, average loss= 1.248534, average accuracy= 60.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 3860, average loss= 1.502311, average accuracy= 20.00%
['had', 'dinner', 'with'] - [Steve] vs [.]
Iter = 3870, average loss= 1.009261, average accuracy= 60.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 3880, average loss= 1.054859, average accuracy= 60.00%
['with', 'Sarah', '.'] - [Sarah] vs [had]
Iter = 3890, average loss= 0.685867, average accuracy= 80.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 3900, average loss= 0.578317, average accuracy= 100.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 3910, average loss= 0.910204, average accuracy= 70.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 3920, average loss= 0.434011, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 3930, average loss= 1.078316, ave

['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 4640, average loss= 0.752380, average accuracy= 70.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 4650, average loss= 0.970721, average accuracy= 60.00%
['had', 'lunch', 'with'] - [Sarah] vs [.]
Iter = 4660, average loss= 0.723756, average accuracy= 80.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 4670, average loss= 0.774722, average accuracy= 70.00%
['.', 'Steve', 'had'] - [breakfast] vs [dinner]
Iter = 4680, average loss= 0.338538, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 4690, average loss= 0.943904, average accuracy= 70.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 4700, average loss= 0.373706, average accuracy= 100.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 4710, average loss= 1.155722, average accuracy= 40.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 4720, average loss= 0.328171, average accuracy= 100.00%
['dinner', 'with', 'Steve