# Deep LSTMs with Word2vec using RNN API

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
%matplotlib inline
import collections
import math
import numpy as np
import os
import random
import tensorflow as tf
import nltk
import zipfile
from matplotlib import pylab
from six.moves import range
from six.moves.urllib.request import urlretrieve
import tensorflow as tf
from tensorflow.contrib import rnn
import csv

ModuleNotFoundError: No module named 'tensorflow.contrib'

## Downloading Stories
Stories are automatically downloaded from https://www.cs.cmu.edu/~spok/grimmtmp/, if not detected in the disk. The total size of stories is around ~500KB. The dataset consists of 100 stories.

In [4]:
url = 'https://www.cs.cmu.edu/~spok/grimmtmp/'

# Create a directory if needed
dir_name = 'stories'
if not os.path.exists(dir_name):
    os.mkdir(dir_name)
    
def maybe_download(filename):
  """Download a file if not present"""
  print('Downloading file: ', dir_name+ os.sep+filename)
    
  if not os.path.exists(dir_name+os.sep+filename):
    filename, _ = urlretrieve(url + filename, dir_name+os.sep+filename)
  else:
    print('Not downloading. File already exists.')
  statinfo = os.stat(dir_name+os.sep+filename)
  
  return filename

num_files = 100
filenames = [format(i, '03d')+'.txt' for i in range(1,num_files+1)]

for fn in filenames:
    maybe_download(fn)

Downloading file:  stories\001.txt
Not downloading. File already exists.
Downloading file:  stories\002.txt
Not downloading. File already exists.
Downloading file:  stories\003.txt
Not downloading. File already exists.
Downloading file:  stories\004.txt
Not downloading. File already exists.
Downloading file:  stories\005.txt
Not downloading. File already exists.
Downloading file:  stories\006.txt
Not downloading. File already exists.
Downloading file:  stories\007.txt
Not downloading. File already exists.
Downloading file:  stories\008.txt
Not downloading. File already exists.
Downloading file:  stories\009.txt
Not downloading. File already exists.
Downloading file:  stories\010.txt
Not downloading. File already exists.
Downloading file:  stories\011.txt
Not downloading. File already exists.
Downloading file:  stories\012.txt
Not downloading. File already exists.
Downloading file:  stories\013.txt
Not downloading. File already exists.
Downloading file:  stories\014.txt
Not downloading.

In [5]:
for i in range(len(filenames)):
    file_exists = os.path.isfile(os.path.join(dir_name,filenames[i]))
    assert file_exists
print('%d files found.'%len(filenames))

100 files found.


## Reading data
Data will be stored in a list of lists where the each list represents a document and document is a list of words. We will then break the text into words.

In [6]:
def read_data(filename):
  
  with open(filename) as f:
    data = tf.compat.as_str(f.read())
    data = data.lower()
    data = nltk.word_tokenize(data)
    
  return data

documents = []
global documents
for i in range(num_files):    
    print('\nProcessing file %s'%os.path.join(dir_name,filenames[i]))
    
    words = read_data(os.path.join(dir_name,filenames[i]))
    
    documents.append(words)
    print('Data size (Characters) (Document %d) %d' %(i,len(words)))
    print('Sample string (Document %d) %s'%(i,words[:50]))


Processing file stories\001.txt
Data size (Characters) (Document 0) 1693
Sample string (Document 0) ['in', 'olden', 'times', 'when', 'wishing', 'still', 'helped', 'one', ',', 'there', 'lived', 'a', 'king', 'whose', 'daughters', 'were', 'all', 'beautiful', ',', 'but', 'the', 'youngest', 'was', 'so', 'beautiful', 'that', 'the', 'sun', 'itself', ',', 'which', 'has', 'seen', 'so', 'much', ',', 'was', 'astonished', 'whenever', 'it', 'shone', 'in', 'her', 'face', '.', 'close', 'by', 'the', 'king', "'s"]

Processing file stories\002.txt
Data size (Characters) (Document 1) 2167
Sample string (Document 1) ['hard', 'by', 'a', 'great', 'forest', 'dwelt', 'a', 'wood-cutter', 'with', 'his', 'wife', ',', 'who', 'had', 'an', 'only', 'child', ',', 'a', 'little', 'girl', 'three', 'years', 'old', '.', 'they', 'were', 'so', 'poor', ',', 'however', ',', 'that', 'they', 'no', 'longer', 'had', 'daily', 'bread', ',', 'and', 'did', 'not', 'know', 'how', 'to', 'get', 'food', 'for', 'her']

Processing file sto

Processing file stories\018.txt
Data size (Characters) (Document 17) 1370
Sample string (Document 17) ['there', 'was', 'once', 'a', 'widow', 'who', 'had', 'two', 'daughters', '-', 'one', 'of', 'whom', 'was', 'pretty', 'and', 'industrious', ',', 'whilst', 'the', 'other', 'was', 'ugly', 'and', 'idle', '.', 'but', 'she', 'was', 'much', 'fonder', 'of', 'the', 'ugly', 'and', 'idle', 'one', ',', 'because', 'she', 'was', 'her', 'own', 'daughter', '.', 'and', 'the', 'other', ',', 'who']

Processing file stories\019.txt
Data size (Characters) (Document 18) 1071
Sample string (Document 18) ['there', 'was', 'once', 'a', 'man', 'who', 'had', 'seven', 'sons', ',', 'and', 'still', 'he', 'had', 'no', 'daughter', ',', 'however', 'much', 'he', 'wished', 'for', 'one', '.', 'at', 'length', 'his', 'wife', 'again', 'gave', 'him', 'hope', 'of', 'a', 'child', ',', 'and', 'when', 'it', 'came', 'into', 'the', 'world', 'it', 'was', 'a', 'girl', '.', 'the', 'joy']

Processing file stories\020.txt
Data size (Char

Data size (Characters) (Document 35) 3766
Sample string (Document 35) ['it', 'is', 'now', 'long', 'ago', ',', 'quite', 'two', 'thousand', 'years', ',', 'since', 'there', 'was', 'a', 'rich', 'man', 'who', 'had', 'a', 'beautiful', 'and', 'pious', 'wife', ',', 'and', 'they', 'loved', 'each', 'other', 'dearly', '.', 'they', 'had', ',', 'however', ',', 'no', 'children', ',', 'though', 'they', 'wished', 'for', 'them', 'very', 'much', ',', 'and', 'the']

Processing file stories\037.txt
Data size (Characters) (Document 36) 984
Sample string (Document 36) ['a', 'farmer', 'once', 'had', 'a', 'faithful', 'dog', 'called', 'sultan', ',', 'who', 'had', 'grown', 'old', ',', 'and', 'lost', 'all', 'his', 'teeth', ',', 'so', 'that', 'he', 'could', 'no', 'longer', 'hold', 'on', 'to', 'anything', '.', 'one', 'day', 'the', 'farmer', 'was', 'standing', 'with', 'his', 'wife', 'before', 'the', 'house-door', ',', 'and', 'said', ',', 'to-morrow', 'i']

Processing file stories\038.txt
Data size (Characters) (Doc


Processing file stories\056.txt
Data size (Characters) (Document 55) 2386
Sample string (Document 55) ['there', 'was', 'once', 'a', 'man', 'who', 'understood', 'all', 'kinds', 'of', 'arts', '.', 'he', 'served', 'in', 'war', ',', 'and', 'behaved', 'well', 'and', 'bravely', ',', 'but', 'when', 'the', 'war', 'was', 'over', 'he', 'received', 'his', 'dismissal', ',', 'and', 'three', 'farthings', 'for', 'his', 'expenses', 'on', 'the', 'way', '.', 'wait', ',', 'said', 'he', ',', 'i']

Processing file stories\057.txt
Data size (Characters) (Document 56) 441
Sample string (Document 56) ['the', 'she-wolf', 'brought', 'into', 'the', 'world', 'a', 'young', 'one', ',', 'and', 'invited', 'the', 'fox', 'to', 'be', 'godfather', '.', 'after', 'all', ',', 'he', 'is', 'a', 'near', 'relative', 'of', 'ours', ',', 'said', 'she', ',', 'he', 'has', 'a', 'good', 'understanding', ',', 'and', 'much', 'talent', ',', 'he', 'can', 'instruct', 'my', 'little', 'son', ',', 'and']

Processing file stories\058.txt
Data

Data size (Characters) (Document 72) 2708
Sample string (Document 72) ['there', 'was', 'once', 'a', 'king', 'who', 'had', 'an', 'illness', ',', 'and', 'no', 'one', 'believed', 'that', 'he', 'would', 'come', 'out', 'of', 'it', 'with', 'his', 'life', '.', 'he', 'had', 'three', 'sons', 'who', 'were', 'much', 'distressed', 'about', 'it', ',', 'and', 'went', 'down', 'into', 'the', 'palace-garden', 'and', 'wept', '.', 'there', 'they', 'met', 'an', 'old']

Processing file stories\074.txt
Data size (Characters) (Document 73) 2203
Sample string (Document 73) ['there', 'was', 'once', 'a', 'poor', 'woodcutter', 'who', 'toiled', 'from', 'early', 'morning', 'till', 'late', 'at', 'night', '.', 'when', 'at', 'last', 'he', 'had', 'laid', 'by', 'some', 'money', 'he', 'said', 'to', 'his', 'boy', ',', '``', 'you', 'are', 'my', 'only', 'child', ',', 'i', 'will', 'spend', 'the', 'money', 'which', 'i', 'have', 'earned', 'with', 'the', 'sweat']

Processing file stories\075.txt
Data size (Characters) (Documen

Sample string (Document 95) ['once', 'upon', 'a', 'time', 'lived', 'a', 'man', 'and', 'a', 'woman', 'who', 'so', 'long', 'as', 'they', 'were', 'rich', 'had', 'no', 'children', ',', 'but', 'when', 'they', 'were', 'poor', 'they', 'got', 'a', 'little', 'boy', '.', 'they', 'could', 'find', 'no', 'godfather', 'for', 'him', ',', 'so', 'the', 'man', 'said', 'he', 'would', 'just', 'go', 'to', 'another']

Processing file stories\097.txt
Data size (Characters) (Document 96) 2613
Sample string (Document 96) ['in', 'the', 'days', 'when', 'wishing', 'was', 'still', 'of', 'some', 'use', ',', 'a', 'king', "'s", 'son', 'was', 'bewitched', 'by', 'an', 'old', 'witch', ',', 'and', 'shut', 'up', 'in', 'an', 'iron', 'stove', 'in', 'a', 'forest', '.', 'there', 'he', 'passed', 'many', 'years', ',', 'and', 'no', 'one', 'could', 'rescue', 'him', '.', 'then', 'a', 'king', "'s"]

Processing file stories\098.txt
Data size (Characters) (Document 97) 1980
Sample string (Document 97) ['there', 'was', 'once', 'a', 'p

## Building the Dictionaries (Bigrams)
Builds the following. To understand each of these elements, let us also assume the text "I like to go to school"

* `dictionary`: maps a string word to an ID (e.g. {I:0, like:1, to:2, go:3, school:4})
* `reverse_dictionary`: maps an ID to a string word (e.g. {0:I, 1:like, 2:to, 3:go, 4:school}
* `count`: List of list of (word, frequency) elements (e.g. [(I,1),(like,1),(to,2),(go,1),(school,1)]
* `data` : Contain the string of text we read, where string words are replaced with word IDs (e.g. [0, 1, 2, 3, 2, 4])

It also introduces an additional special token `UNK` to denote rare words to are too rare to make use of.

In [7]:
def build_dataset(documents):
    chars = []
    # This is going to be a list of lists
    # Where the outer list denote each document
    # and the inner lists denote words in a given document
    data_list = []
  
    for d in documents:
        chars.extend(d)
    print('%d Words found.'%len(chars))
    count = []
    # Get the word sorted by their frequency (Highest comes first)
    count.extend(collections.Counter(chars).most_common())
    
    # Create an ID for each word by giving the current length of the dictionary
    # And adding that item to the dictionary
    # Start with 'UNK' that is assigned to too rare words
    dictionary = dict({'UNK':0})
    for char, c in count:
        # Only add a bigram to dictionary if its frequency is more than 10
        if c > 10:
            dictionary[char] = len(dictionary)    
    
    unk_count = 0
    # Traverse through all the text we have
    # to replace each string word with the ID of the word
    for d in documents:
        data = list()
        for char in d:
            # If word is in the dictionary use the word ID,
            # else use the ID of the special token "UNK"
            if char in dictionary:
                index = dictionary[char]        
            else:
                index = dictionary['UNK']
                unk_count += 1
            data.append(index)
            
        data_list.append(data)
        
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 
    return data_list, count, dictionary, reverse_dictionary

global data_list, count, dictionary, reverse_dictionary,vocabulary_size

# Print some statistics about data
data_list, count, dictionary, reverse_dictionary = build_dataset(documents)
print('Most common words (+UNK)', count[:5])
print('Least common words (+UNK)', count[-15:])
print('Sample data', data_list[0][:10])
print('Sample data', data_list[1][:10])
print('Vocabulary: ',len(dictionary))
vocabulary_size = len(dictionary)
del documents  # To reduce memory.

202036 Words found.
Most common words (+UNK) [(',', 17882), ('the', 11960), ('and', 9325), ('.', 7633), ('to', 4520)]
Least common words (+UNK) [('wet', 1), ('iii', 1), ('sheltered', 1), ('yelled', 1), ('oats', 1), ('armchair', 1), ('peaceably', 1), ('kitchen-sink', 1), ('fretted', 1), ('kernels', 1), ('youths', 1), ('hammers', 1), ('minutest', 1), ('tenderly', 1), ('craftily', 1)]
Sample data [12, 0, 776, 25, 1173, 118, 826, 41, 1, 44]
Sample data [577, 60, 7, 83, 119, 0, 7, 0, 22, 19]
Vocabulary:  1322


## Generating Batches of Data
The following object generates a batch of data which will be used to train the RNN. More specifically the generator breaks a given sequence of words into `batch_size` segments. We also maintain a cursor for each segment. So whenever we create a batch of data, we sample one item from each segment and update the cursor of each segment. Note that the input words (as well as labels) are not one-hot encoded anymore as we need the word index to do embedding lookups.

In [8]:
class DataGeneratorSeq(object):
    
    def __init__(self,text,batch_size,num_unroll):
        # Text where a bigram is denoted by its ID
        self._text = text
        # Number of bigrams in the text
        self._text_size = len(self._text)
        # Number of datapoints in a batch of data
        self._batch_size = batch_size
        # Num unroll is the number of steps we unroll the RNN in a single training step
        # This relates to the truncated backpropagation we discuss in Chapter 6 text
        self._num_unroll = num_unroll
        # We break the text in to several segments and the batch of data is sampled by
        # sampling a single item from a single segment
        self._segments = self._text_size//self._batch_size
        self._cursor = [offset * self._segments for offset in range(self._batch_size)]
        
    def next_batch(self):
        '''
        Generates a single batch of data
        '''
        # Train inputs (one-hot-encoded) and train outputs (one-hot-encoded)
        batch_data = np.zeros((self._batch_size),dtype=np.float32)
        batch_labels = np.zeros((self._batch_size),dtype=np.float32)
        
        # Fill in the batch datapoint by datapoint
        for b in range(self._batch_size):
            # If the cursor of a given segment exceeds the segment length
            # we reset the cursor back to the beginning of that segment
            if self._cursor[b]+1>=self._text_size:
                self._cursor[b] = b * self._segments
            
            # Add the text at the cursor as the input
            batch_data[b] = self._text[self._cursor[b]]
            # Add the preceding word as the label to be predicted
            batch_labels[b]= self._text[self._cursor[b]+1]                      
            # Update the cursor
            self._cursor[b] = (self._cursor[b]+1)%self._text_size
                    
        return batch_data,batch_labels
        
    def unroll_batches(self):
        '''
        This produces a list of num_unroll batches
        as required by a single step of training of the RNN
        '''
        unroll_data,unroll_labels = [],[]
        for ui in range(self._num_unroll):
            data, labels = self.next_batch()            
            unroll_data.append(data)
            unroll_labels.append(labels)
        
        return unroll_data, unroll_labels
    
    def reset_indices(self):
        '''
        Used to reset all the cursors if needed
        '''
        self._cursor = [offset * self._segments for offset in range(self._batch_size)]
        
# Running a tiny set to see if things are correct
dg = DataGeneratorSeq(data_list[0][25:50],5,5)
u_data, u_labels = dg.unroll_batches()

# Iterate through each data batch in the unrolled set of batches
for ui,(dat,lbl) in enumerate(zip(u_data,u_labels)):   
    print('\n\nUnrolled index %d'%ui)
    dat_ind = dat
    lbl_ind = lbl
    print('\tInputs:')
    for sing_dat in dat_ind:
        print('\t%s (%d)'%(reverse_dictionary[sing_dat],sing_dat),end=", ")
    print('\n\tOutput:')
    for sing_lbl in lbl_ind:        
        print('\t%s (%d)'%(reverse_dictionary[sing_lbl],sing_lbl),end=", ")



Unrolled index 0
	Inputs:
	that (14), 	which (56), 	, (1), 	shone (792), 	close (822), 
	Output:
	the (2), 	has (114), 	was (8), 	in (12), 	by (60), 

Unrolled index 1
	Inputs:
	the (2), 	has (114), 	was (8), 	in (12), 	by (60), 
	Output:
	sun (407), 	seen (283), 	astonished (869), 	her (17), 	the (2), 

Unrolled index 2
	Inputs:
	sun (407), 	seen (283), 	astonished (869), 	her (17), 	the (2), 
	Output:
	itself (720), 	so (33), 	whenever (1319), 	face (360), 	king (34), 

Unrolled index 3
	Inputs:
	itself (720), 	so (33), 	whenever (1319), 	face (360), 	king (34), 
	Output:
	, (1), 	much (165), 	it (9), 	. (4), 	's (53), 

Unrolled index 4
	Inputs:
	, (1), 	much (165), 	it (9), 	. (4), 	close (822), 
	Output:
	which (56), 	, (1), 	shone (792), 	close (822), 	by (60), 

## Defining the LSTM

This is a standard LSTM. The LSTM has 5 main components.
* Cell state
* Hidden state
* Input gate
* Forget gate
* Output gate

Each gate has three sets of weights (1 set for the current input, 1 set for the previous hidden state and 1 bias)

## Defining hyperparameters

Here we define several hyperparameters and are very similar to the ones we defined in Chapter 6. However additionally we use dropout; a technique that helps to avoid overfitting.

In [9]:
# Number of neurons in the hidden state variables
num_nodes = [64, 48, 32]

# Number of data points in a batch we process
batch_size = 32

# Number of time steps we unroll for during optimization
num_unrollings = 50

dropout = 0.2 # We use dropout

# Use this in the CSV filename when saving
# when using dropout
filename_extension = ''
if dropout>0.0:
    filename_extension = '_dropout'
    
filename_to_save = 'lstm_word2vec'+filename_extension+'.csv' # use to save perplexity values

## Defining Inputs and Outputs

In the code we define two different types of inputs. 
* Training inputs (The stories we downloaded) (batch_size > 1 with unrolling)
* Validation inputs (An unseen validation dataset) (bach_size =1, no unrolling)
* Test inputs (New story we are going to generate) (batch_size=1, no unrolling)

In [25]:
tf.reset_default_graph()

# Training Input data.
train_inputs, train_labels = [],[]
train_labels_ohe = []
# Defining unrolled training inputs
for ui in range(num_unrollings):
    train_inputs.append(tf.placeholder(tf.int32, shape=[batch_size],name='train_inputs_%d'%ui))
    train_labels.append(tf.placeholder(tf.int32, shape=[batch_size], name = 'train_labels_%d'%ui))
    train_labels_ohe.append(tf.one_hot(train_labels[ui], vocabulary_size))
    
# Validation data placeholders
valid_inputs = tf.placeholder(tf.int32, shape=[1],name='valid_inputs')
valid_labels = tf.placeholder(tf.int32, shape=[1], name = 'valid_labels')
valid_labels_ohe = tf.one_hot(valid_labels, vocabulary_size)

# Text generation: batch 1, no unrolling.
test_input = tf.placeholder(tf.int32, shape=[1],name='test_input')

## Loading Word Embeddings to TensorFlow
We load the previously learned and stored embeddings to TensorFlow and define tensors to hold embeddings

In [26]:
## If you want to change the embedding matrix to something you newly generated,
## Simply change embeddings.npy to embeddings-tmp.npy
embed_mat = np.load('embeddings.npy')
embeddings_size = embed_mat.shape[1]

embed_init = tf.constant(embed_mat)
embeddings = tf.Variable(embed_init,name='embeddings')

# Defining embedding lookup operations for all the unrolled
# trianing inputs
train_inputs_embeds = []
for ui in range(num_unrollings):
    # We use expand_dims to add an additional axis
    # As this is needed later for LSTM cell computation
    train_inputs_embeds.append(tf.expand_dims(tf.nn.embedding_lookup(embeddings,train_inputs[ui]),0))

# Defining embedding lookup for operations for all the validation data
valid_inputs_embeds = tf.nn.embedding_lookup(embeddings,valid_inputs)

# Defining embedding lookup for operations for all the testing data
test_input_embeds = tf.nn.embedding_lookup(embeddings, test_input)

## Defining Model Parameters

Now we define model parameters. Compared to RNNs, LSTMs have a large number of parameters. Each gate (input, forget, memory and output) has three different sets of parameters.

In [27]:
print('Defining softmax weights and biases')
# Softmax Classifier weights and biases.
w = tf.Variable(tf.truncated_normal([num_nodes[-1], vocabulary_size], stddev=0.01))
b = tf.Variable(tf.random_uniform([vocabulary_size],0.0,0.01))

print('Defining the LSTM cell')
# Defining a deep LSTM from Tensorflow RNN API

# First we define a list of LSTM cells
# num_nodes here is a sequence of hidden layer sizes
cells = [tf.nn.rnn_cell.LSTMCell(n) for n in num_nodes]

# We now define a dropout wrapper for each LSTM cell
dropout_cells = [
    rnn.DropoutWrapper(
        cell=lstm, input_keep_prob=1.0,
        output_keep_prob=1.0-dropout, state_keep_prob=1.0,
        variational_recurrent=True, 
        input_size=tf.TensorShape([embeddings_size]),
        dtype=tf.float32
    ) for lstm in cells
]

# We first define a MultiRNNCell Object that uses the 
# Dropout wrapper (for training)
stacked_dropout_cell = tf.nn.rnn_cell.MultiRNNCell(dropout_cells)
# Here we define a MultiRNNCell that does not use dropout
# Validation and Testing
stacked_cell = tf.nn.rnn_cell.MultiRNNCell(cells)


# Note: There exists the EmbeddingWrapper in RNN API to automate the embedding_lookup but,
# in many cases it may be more efficient to not use this wrapper, but instead concatenate the whole sequence of 
# your inputs in time, do the embedding on this batch-concatenated sequence, then split it and feed into your RNN.


Defining softmax weights and biases
Defining the LSTM cell


## Defining LSTM Computations
Here first we define the LSTM cell computations as a consice function. Then we use this function to define training and test-time inference logic.

In [28]:
print('LSTM calculations for unrolled inputs and outputs')
# =========================================================
# Training inference logic

# Initial state of the LSTM memory.
initial_state = stacked_dropout_cell.zero_state(batch_size, dtype=tf.float32)

# Defining the LSTM cell computations (training)
train_outputs, initial_state = tf.nn.dynamic_rnn(
    stacked_dropout_cell, tf.concat(train_inputs_embeds,axis=0), 
    time_major=True, initial_state=initial_state
)

# Reshape the final outputs to [num_unrollings*batch_size, num_nodes]
final_output = tf.reshape(train_outputs,[-1,num_nodes[-1]])

# Computing logits
logits = tf.matmul(final_output, w) + b
# Computing predictions
train_prediction = tf.nn.softmax(logits)

# Reshape logits to time-major fashion [num_unrollings, batch_size, vocabulary_size]
time_major_train_logits = tf.reshape(logits,[num_unrollings,batch_size,-1])

# We create train labels in a time major fashion [num_unrollings, batch_size, vocabulary_size]
# so that this could be used with the loss function
time_major_train_labels = tf.reshape(tf.concat(train_labels,axis=0),[num_unrollings,batch_size])

# Perplexity related operation
train_perplexity_without_exp = tf.reduce_sum(tf.concat(train_labels_ohe,0)*-tf.log(train_prediction+1e-10))/(num_unrollings*batch_size)

# =========================================================
# Validation inference logic

# Separate state for validation data
initial_valid_state = stacked_cell.zero_state(1, dtype=tf.float32)

# Validation input related LSTM computation
valid_outputs, initial_valid_state = tf.nn.dynamic_rnn(
    stacked_cell, tf.expand_dims(valid_inputs_embeds,0), 
    time_major=True, initial_state=initial_valid_state
)

# Reshape the final outputs to [1, num_nodes]
final_valid_output = tf.reshape(valid_outputs,[-1,num_nodes[-1]])

# Computing logits
valid_logits = tf.matmul(final_valid_output, w) + b
# Computing predictions
valid_prediction = tf.nn.softmax(valid_logits)

# Perplexity related operation
valid_perplexity_without_exp = tf.reduce_sum(valid_labels_ohe*-tf.log(valid_prediction+1e-10))

LSTM calculations for unrolled inputs and outputs


## Calculating LSTM Loss
We calculate the training loss of the LSTM here. It's a typical cross entropy loss calculated over all the scores we obtained for training data (`loss`) and averaged and summed in a specific way.

In [29]:
# We use the sequence-to-sequence loss function to define the loss
# We calculate the average across the batches
# But get the sum across the sequence length
loss = tf.contrib.seq2seq.sequence_loss(
    logits = tf.transpose(time_major_train_logits,[1,0,2]),
    targets = tf.transpose(time_major_train_labels),
    weights= tf.ones([batch_size, num_unrollings], dtype=tf.float32),
    average_across_timesteps=False,
    average_across_batch=True
)

loss = tf.reduce_sum(loss)

## Defining Learning Rate and the Optimizer with Gradient Clipping
Here we define the learning rate and the optimizer we're going to use. We will be using the Adam optimizer as it is one of the best optimizers out there. Furthermore we use gradient clipping to prevent any gradient explosions.

In [30]:
# Used for decaying learning rate
gstep = tf.Variable(0, trainable=False)

# Running this operation will cause the value of gstep
# to increase, while in turn reducing the learning rate
inc_gstep = tf.assign(gstep, gstep+1)

# Adam Optimizer. And gradient clipping.
tf_learning_rate = tf.train.exponential_decay(0.001,gstep,decay_steps=1, decay_rate=0.5)

print('Defining optimizer')
optimizer = tf.train.AdamOptimizer(tf_learning_rate)
gradients, v = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
optimizer = optimizer.apply_gradients(
    zip(gradients, v))

inc_gstep = tf.assign(gstep, gstep+1)

Defining optimizer


## LSTM with Beam-Search

Here we alter the previously defined prediction related TensorFlow operations to employ beam-search. Beam search is a way of predicting several time steps ahead. Concretely instead of predicting the best prediction we have at a given time step, we get predictions for several time steps and get the sequence of highest joint probability.

In [31]:
beam_length = 5
beam_neighbors = 5

# We redefine the sample generation with beam search
sample_beam_inputs = [tf.placeholder(tf.int32, shape=[1]) for _ in range(beam_neighbors)]
# Embedding lookups for each beam
sampel_beam_input_embeds = [tf.nn.embedding_lookup(embeddings,b) for b in sample_beam_inputs]

best_beam_index = tf.placeholder(shape=None, dtype=tf.int32)
best_neighbor_beam_indices = tf.placeholder(shape=[beam_neighbors], dtype=tf.int32)

# We have [num_layers, beam_neighbors] shape state variable set
# Maintains output of each beam
saved_sample_beam_output = [[tf.Variable(tf.zeros([1, n])) for _ in range(beam_neighbors)] for n in num_nodes]
# Maintains the state of each beam
saved_sample_beam_state = [[tf.Variable(tf.zeros([1, n])) for _ in range(beam_neighbors)] for n in num_nodes] 

# Resetting the sample beam states (should be done at the beginning of each text snippet generation)
reset_sample_beam_state = tf.group(
    *[[saved_sample_beam_output[ni][vi].assign(tf.zeros([1, n]))  for vi in range(beam_neighbors)] for ni,n in enumerate(num_nodes)],
    *[[saved_sample_beam_state[ni][vi].assign(tf.zeros([1, n])) for vi in range(beam_neighbors)] for ni,n in enumerate(num_nodes)] 
)

# We stack them to perform gather operation below
# These should be of size [beam_neighbors, 1, num_nodes]
stacked_beam_outputs = [tf.stack(saved_sample_beam_output[n]) for n in range(len(num_nodes))]
stacked_beam_states = [tf.stack(saved_sample_beam_state[n]) for n in range(len(num_nodes))]

# The beam states for each beam (there are beam_neighbor-many beams) needs to be updated at every depth of tree
# Consider an example where you have 3 classes where we get the best two neighbors (marked with star)
#     a`      b*       c  
#   / | \   / | \    / | \
#  a  b c  a* b` c  a  b  c
# Since both the candidates from level 2 comes from the parent b
# We need to update both states/outputs from saved_sample_beam_state/output to have index 1 (corresponding to parent b)

# Our update_sample_beam_state gets very complicated
# Because we have to do this for every beam neighbor 
# as well as every layer
update_sample_beam_state = tf.group(
    *[
        [saved_sample_beam_output[n][vi].assign(
            tf.gather_nd(stacked_beam_outputs[n],[best_neighbor_beam_indices[vi]])) for vi in range(beam_neighbors)
          for n in range(len(num_nodes))]
    ],
    *[
        [saved_sample_beam_state[n][vi].assign(
            tf.gather_nd(stacked_beam_states[n],[best_neighbor_beam_indices[vi]])) for vi in range(beam_neighbors)
          for n in range(len(num_nodes))]
    ]
)

# This needs to be of shap [beam_neighbors, num_layers]
sample_beam_outputs, sample_beam_states = [],[] 

# This needs to be of shape [beam_neighbors, num_layers]
# and each item is a LSTMStateTuple
# We calculate lstm_cell state and output for each beam
tmp_state_tuple = []
for vi in range(beam_neighbors):
    single_beam_state_tuple = []
    for ni in range(len(num_nodes)):
        single_beam_state_tuple.append(
            tf.nn.rnn_cell.LSTMStateTuple(saved_sample_beam_output[ni][vi], saved_sample_beam_state[ni][vi])
        )
    tmp_state_tuple.append(single_beam_state_tuple)


for vi in range(beam_neighbors):
    # We cannot use tf.nn.dynamic_rnn as we need to manipulate
    # LSTM state a lot. So even though it is lot of work
    # It is easier to do state manipulation externelly 
    # when using beam search
    final_output, tmp_state_tuple[vi] = stacked_cell.call(
        sampel_beam_input_embeds[vi], tmp_state_tuple[vi]
    )
    
    # We need to be care how we populate sample_beam_outputs
    # and sample_beam_state
    # They both need to be of size [beam_neighbors, num_layers]
    sample_beam_outputs.append([])
    sample_beam_states.append([])
    for ni in range(len(num_nodes)):
        sample_beam_outputs[-1].append(tmp_state_tuple[vi][ni][0])
        sample_beam_states[-1].append(tmp_state_tuple[vi][ni][1])

    
# This store predictions made for each beam neighbor position
sample_beam_predictions = []

# Used to update the LSTM cell for each neighbor 
# Just as normally we do during generation
beam_update_ops = tf.group(
    [[saved_sample_beam_output[ni][vi].assign(sample_beam_outputs[vi][ni]) for vi in range(beam_neighbors)]
                            for ni in range(len(num_nodes))],
    [[saved_sample_beam_state[ni][vi].assign(sample_beam_states[vi][ni]) for vi in range(beam_neighbors)]
                            for ni in range(len(num_nodes))]
)

# Get the predictions out
# For a given set of beams, outputs a list of prediction vectors of size beam_neighbors
# each beam having the predictions for full vocabulary
for vi in range(beam_neighbors):
    with tf.control_dependencies([beam_update_ops]):
        sample_beam_predictions.append(tf.nn.softmax(tf.nn.xw_plus_b(sample_beam_outputs[vi][-1], w, b)))
        

[1, 48]


## LSTM + Word2vec with Beam-Search

Here we alter the previously defined prediction related TensorFlow operations to employ beam-search. Beam search is a way of predicting several time steps ahead. Concretely instead of predicting the best prediction we have at a given time step, we get predictions for several time steps and get the sequence of highest joint probability.

### Learning rate Decay Logic

Here we define the logic to decrease learning rate whenever the validation perplexity does not decrease

In [32]:
# Learning rate decay related
# If valid perpelxity does not decrease
# continuously for this many epochs
# decrease the learning rate
decay_threshold = 5
# Keep counting perplexity increases
decay_count = 0

min_perplexity = 1e10

# Learning rate decay logic
def decay_learning_rate(session, v_perplexity):
  global decay_threshold, decay_count, min_perplexity  
  # Decay learning rate
  if v_perplexity < min_perplexity:
    decay_count = 0
    min_perplexity= v_perplexity
  else:
    decay_count += 1

  if decay_count >= decay_threshold:
    print('\t Reducing learning rate')
    decay_count = 0
    session.run(inc_gstep)

### Defining the Beam Prediction Logic
Here we define function that takes in the session as an argument and output a beam of predictions

In [33]:
test_word = None

def get_beam_prediction(session):
    
    global test_word
    global sample_beam_predictions, update_sample_beam_state
    
    # Generating words within a segment with Beam Search
    # To make some calculations clearer, we use the example as follows
    # We have three classes with beam_neighbors=2 (best candidate denoted by *, second best candidate denoted by `)
    # For simplicity we assume best candidate always have probability of 0.5 in output prediction
    # second best has 0.2 output prediction
    #           a`                   b*                   c                <--- root level
    #    /     |     \         /     |     \        /     |     \   
    #   a      b      c       a*     b`     c      a      b      c         <--- depth 1
    # / | \  / | \  / | \   / | \  / | \  / | \  / | \  / | \  / | \
    # a b c  a b c  a b c   a*b c  a`b c  a b c  a b c  a b c  a b c       <--- depth 2
    # So the best beams at depth 2 would be
    # b-a-a and b-b-a

    

    # Calculate the candidates at the root level
    feed_dict = {}
    for b_n_i in range(beam_neighbors):
        feed_dict.update({sample_beam_inputs[b_n_i]: [test_word]})

    # We calculate sample predictions for all neighbors with the same starting word/character
    # This is important to update the state for all instances of beam search
    sample_preds_root = session.run(sample_beam_predictions, feed_dict = feed_dict)  
    sample_preds_root = sample_preds_root[0]

    # indices of top-k candidates
    # b and a in our example (root level)
    this_level_candidates_sorted =  (np.argsort(sample_preds_root,axis=1).ravel()[::-1]).tolist() # indices of top-k candidates
    this_level_candidates = []
    for c in this_level_candidates_sorted:
        if len(this_level_candidates)==beam_neighbors:
            break
        if c!=0:
            this_level_candidates.append(c)

    this_level_candidates = np.array(this_level_candidates)

    # probabilities of top-k candidates
    # 0.5 and 0.2
    this_level_probs = sample_preds_root[0,this_level_candidates] #probabilities of top-k candidates

    # Update test sequence produced by each beam from the root level calculation
    # Test sequence looks like for our example (at root)
    # [b,a]
    test_sequences = ['' for _ in range(beam_neighbors)]
    for b_n_i in range(beam_neighbors):
        test_sequences[b_n_i] += reverse_dictionary[this_level_candidates[b_n_i]] + ' '

    # Make the calculations for the rest of the depth of the beam search tree
    for b_i in range(beam_length-1):
        test_words = [] # candidate words for each beam
        pred_words = [] # Predicted words of each beam

        # computing feed_dict for the beam search (except root)
        # feed dict should contain the best words/chars/bigrams found by the previous level of search

        # For level 1 in our example this would be
        # sample_beam_inputs[0]: b, sample_beam_inputs[1]:a
        feed_dict = {}
        for p_idx, pred_i in enumerate(this_level_candidates):                    
            # Updating the feed_dict for getting next predictions
            test_words.append(this_level_candidates[p_idx])

            feed_dict.update({sample_beam_inputs[p_idx]:[test_words[p_idx]]})

        # Calculating predictions for all neighbors in beams
        # This is a list of vectors where each vector is the prediction vector for a certain beam
        # For level 1 in our example, the prediction values for 
        #      b             a  (previous beam search results)
        # [a,  b,  c],  [a,  b,  c] (current level predictions) would be
        # [0.1,0.1,0.1],[0.5,0.2,0]
        sample_preds_all_neighbors = session.run(sample_beam_predictions, feed_dict=feed_dict)

        # Create a single vector with 
        # Making our example [0.1,0.1,0.1,0.5,0.2,0] 
        sample_preds_all_neighbors_concat = np.concatenate(sample_preds_all_neighbors,axis=1)

        # Update this_level_candidates to be used for the next iteration
        # And update the probabilities for each beam
        # In our example these would be [3,4] (indices with maximum value from above vector)
        # We also use a simple trick to avoid UNK (word id 0) being predicted 
        this_level_candidates_sorted = np.argsort(sample_preds_all_neighbors_concat.ravel())[::-1]
        this_level_candidates = []
        for c in this_level_candidates_sorted:
            if len(this_level_candidates)==beam_neighbors:
                break
            if c!=0 and c%vocabulary_size != 0 :
                this_level_candidates.append(c)

        this_level_candidates = np.array(this_level_candidates)

        # In the example this would be [1,1]
        parent_beam_indices = this_level_candidates//vocabulary_size

        # normalize this_level_candidates to fall between [0,vocabulary_size]
        # In this example this would be [0,1]
        this_level_candidates = (this_level_candidates%vocabulary_size).tolist()

        # Here we update the final state of each beam to be
        # the state that was at the index 1. Because for both the candidates at this level the parent is 
        # at index 1 (that is b from root level)
        session.run(update_sample_beam_state, feed_dict={best_neighbor_beam_indices: parent_beam_indices})

        # Here we update the joint probabilities of each beam and add the newly found candidates to the sequence
        tmp_this_level_probs = np.asarray(this_level_probs)
        tmp_test_sequences = list(test_sequences)

        for b_n_i in range(beam_neighbors):
            # We make the b_n_i element of this_level_probs to be the probability of parents
            # In the example the parent indices are [1,1]
            # So this_level_probs become [0.5,0.5]
            this_level_probs[b_n_i] = tmp_this_level_probs[parent_beam_indices[b_n_i]]

            # Next we multipyle these by the probabilities of the best candidates from current level 
            # [0.5*0.5, 0.5*0.2] = [0.25,0.1]
            this_level_probs[b_n_i] *= sample_preds_all_neighbors[parent_beam_indices[b_n_i]][0,this_level_candidates[b_n_i]]

            # Make the b_n_i element of test_sequences to be the correct parent of the current best candidates
            # In the example this becomes [b, b]
            test_sequences[b_n_i] = tmp_test_sequences[parent_beam_indices[b_n_i]]

            # Now we append the current best candidates
            # In this example this becomes [ba,bb]
            test_sequences[b_n_i] += reverse_dictionary[this_level_candidates[b_n_i]] + ' '

            # Create one-hot-encoded representation for each candidate
            pred_words.append(this_level_candidates[b_n_i])

    # Calculate best beam id based on the highest beam probability
    # Using the highest beam probability always lead to very monotonic text
    # Let us sample one randomly where one being sampled is decided by the likelihood of that beam
    rand_cand_ids = np.argsort(this_level_probs)[-3:]
    rand_cand_probs = this_level_probs[rand_cand_ids]/np.sum(this_level_probs[rand_cand_ids])
    random_id = np.random.choice(rand_cand_ids,p=rand_cand_probs)

    best_beam_id = parent_beam_indices[random_id]
    # Update state and output variables for test prediction
    session.run(update_sample_beam_state,feed_dict={best_neighbor_beam_indices:[best_beam_id for _ in range(beam_neighbors)]})

    # Make the last word/character/bigram from the best beam
    test_word = pred_words[best_beam_id]
        
    return test_sequences[best_beam_id]

### Running Training, Validation and Generation

We traing the LSTM on existing training data, check the validaiton perplexity on an unseen chunk of text and generate a fresh segment of text

In [36]:
num_steps = 251
steps_per_document = 100
docs_per_step = 10
valid_summary = 1
train_doc_count = 100

In [41]:
beam_nodes = []

train_perplexity_ot = []
valid_perplexity_ot = []
session = tf.InteractiveSession()

tf.global_variables_initializer().run()

print('Initialized')
average_loss = 0

# We use the first 10 documents that has 
# more than (num_steps+1)*steps_per_document bigrams for creating the validation dataset

# Identify the first 10 documents following the above condition
long_doc_ids = []
for di in range(num_files):
  if len(data_list[di])>1000:
    long_doc_ids.append(di)
  if len(long_doc_ids)==10:
    break

# Generating data
data_gens = []
valid_gens = []
for fi in range(num_files):
  # Get all the bigrams if the document id is not in the validation document ids
  if fi not in long_doc_ids:
    data_gens.append(DataGeneratorSeq(data_list[fi],batch_size,num_unrollings))
  # if the document is in the validation doc ids, only get up to the 
  # last steps_per_document bigrams and use the last steps_per_document bigrams as validation data
  else:
    data_gens.append(DataGeneratorSeq(data_list[fi][:-steps_per_document],batch_size,num_unrollings))
    # Defining the validation data generator
    valid_gens.append(DataGeneratorSeq(data_list[fi][-steps_per_document:],1,1))

valid_docs = len(valid_gens)
assert valid_docs>0

feed_dict = {}
for step in range(num_steps):
    print('Training (Step: %d)'%step)
    for di in np.random.permutation(train_doc_count)[:docs_per_step]:            

        for doc_step_id in range(steps_per_document):
            
            u_data, u_labels = data_gens[di].unroll_batches()
            for ui,(dat,lbl) in enumerate(zip(u_data,u_labels)):            
                feed_dict[train_inputs[ui]] = dat
                feed_dict[train_labels[ui]] = lbl
                #print(['( %s; %s ) '%(reverse_dictionary[tid],reverse_dictionary[til]) for tid,til in zip(np.argmax(dat,axis=1),np.argmax(lbl,axis=1))])
            
            feed_dict.update({tf_learning_rate:0.0005})
            _, l, step_perplexity = session.run([optimizer, loss, train_perplexity_without_exp], 
                                                       feed_dict=feed_dict)
            
            average_loss += step_perplexity
        
        print('(%d).'%di,end='')
    print('')    
    
    if (step+1) % valid_summary == 0:
      
      average_loss = average_loss / (docs_per_step*steps_per_document*valid_summary)
      # The average loss is an estimate of the loss over the last 2000 batches.
      print('Average loss at step %d: %f' % (step+1, average_loss))
      print('\tPerplexity at step %d: %f' %(step+1, np.exp(average_loss)))
      train_perplexity_ot.append(np.exp(average_loss))
      average_loss = 0 # reset loss
      
      valid_loss = 0 # reset loss
        
      # calculate valid perplexity
      for v_doc_id in range(valid_docs):
          # Remember we process things as bigrams
          # So need to divide by 2
          for v_step in range(steps_per_document//2):
            uvalid_data,uvalid_labels = valid_gens[v_doc_id].unroll_batches()        

            # Run validation phase related TensorFlow operations       
            v_perp = session.run(
                valid_perplexity_without_exp,
                feed_dict = {valid_inputs:uvalid_data[0],valid_labels: uvalid_labels[0]}
            )

            valid_loss += v_perp
            
          # Reset validation data generator cursor
          valid_gens[v_doc_id].reset_indices() 
      print()      
      v_perplexity = np.exp(valid_loss/(steps_per_document*valid_docs//2))
      print("Valid Perplexity: %.2f\n"%v_perplexity)
      valid_perplexity_ot.append(v_perplexity)
          
      decay_learning_rate(session, v_perplexity)
    
      # Generating new text ...
      # We will be generating one segment having 500 bigrams
      # Feel free to generate several segments by changing
      # the value of segments_to_generate
      print('Generated Text after epoch %d ... '%step)  
      segments_to_generate = 1
      chars_in_segment = 500//beam_length
    
      for _ in range(segments_to_generate):
        print('======================== New text Segment ==========================')
        # first word randomly generated
        test_word = data_list[np.random.randint(0,num_files)][np.random.randint(0,100)]
        print("",reverse_dictionary[test_word],end=' ')
        
        # Generating words within a segment with Beam Search
        for _ in range(chars_in_segment):
            test_sequence = get_beam_prediction(session)
            print(test_sequence,end=' ')    
        print(" ")
        session.run([reset_sample_beam_state])
        
        print('====================================================================')
        
      print("")

session.close()

with open('lstm_beam_search_word2vec_rnn_api.csv', 'wt') as f:
    writer = csv.writer(f,delimiter=',')
    writer.writerow(train_perplexity_ot)
    writer.writerow(valid_perplexity_ot)

Initialized
Training (Step: 0)
(1).(62).(82).(67).(57).(87).(58).(10).(30).(6).
Average loss at step 1: 5.133673
	Perplexity at step 1: 169.639030

Valid Perplexity: 232.29

Generated Text after epoch 0 ... 
 UNK her , , , ,  , , , , ,  . , , , ,  . , , , ,  , , , , ,  , , , , ,  , , , , ,  and , , , ,  , , , , ,  . , , , ,  her , , , ,  her , , , ,  and , , , ,  the , , , ,  the , , , ,  and , , , ,  and , , , ,  and , , , ,  . , , , ,  . , , , ,  and , , , ,  and , , , ,  her , , , ,  , , , , ,  . , , , ,  the , , , ,  . , , , ,  the , , , ,  the , , , ,  , , , , ,  . , , , ,  the , , , ,  . , , , ,  and , , , ,  and , , , ,  her , , , ,  the , , , ,  the , , , ,  . , , , ,  and , , , ,  , , , , ,  . , , , ,  , , , , ,  . , , , ,  . , , , ,  the , , , ,  and , , , ,  and , , , ,  the , , , ,  . , , , ,  the , , , ,  her , , , ,  the , , , ,  the , , , ,  her , , , ,  the , , , ,  the , , , ,  and , , , ,  , , , , ,  , , , , ,  the , , , ,  the , , , ,  , , , , ,  the , , , ,  . , , ,

 , , and and , ,  , , , , ,  to , , , ,  , , , , ,  , , , , ,  to , , , ,  , , , , ,  's , , , ,  , , , , ,  's , , , ,  's , , , ,  . , , , ,  , , , , ,  to , , , ,  to , , , ,  to , , , ,  . , , , ,  of , , , ,  , , , , ,  of , , , ,  of , , , ,  to , , , ,  to , , , ,  to , , , ,  to , , , ,  . , , , ,  of , , , ,  , , , , ,  , , , , ,  . , , , ,  's , , , ,  to , , , ,  to , , , ,  , , , , ,  . , , , ,  's , , , ,  of , , , ,  's , , , ,  of , , , ,  's , , , ,  to , , , ,  , , , , ,  , , , , ,  to , , , ,  's , , , ,  , , , , ,  to , , , ,  . , , , ,  , , , , ,  of , , , ,  to , , , ,  . , , , ,  . , , , ,  to , , , ,  to , , , ,  . , , , ,  of , , , ,  of , , , ,  of , , , ,  of , , , ,  . , , , ,  . , , , ,  of , , , ,  . , , , ,  , , , , ,  . , , , ,  . , , , ,  's , , , ,  of , , , ,  , , , , ,  . , , , ,  . , , , ,  . , , , ,  's , , , ,  . , , , ,  , , , , ,  's , , , ,  , , , , ,  of , , , ,  of , , , ,  . , , , ,  . , , , ,  . , , , ,  , , , , ,  's , , , ,  's , , , ,  . 



that in in in in  that in in in in  to in in in in  with in in in in  that in in in in  that that that in in  with in in in in  that in in in in  that in in in in  that in in in in  that in in in in  that in in in in  to in in in in  that in in in in  that in in in in  that in in in in  to in in in in  that in in in in  that in in in in  that in in in in  to in in in in  that in in in in  that in in in in  that in in in in  to in in in in  that in in in in  that in in in in  that in in in in  to in in in in  that in in in in  that in in in in  that in in in in  in in in in in  with in in in in  that in in in in  that in in in in  that in in in in  in in in in in  to in in in in  to in in in in  , in in in in  , in in in in  that in in in in  that in in in in  in in in in in  that in in in in  , in in in in  that in in in in  with in in in in  , in in in in  to in in in in  with in in in in  that in in in in  that in in in in  in in in in in  that in in in in  , in in in in  in in in in

(6).(62).(22).(55).(65).(11).(82).(16).(79).(50).
Average loss at step 18: 3.871798
	Perplexity at step 18: 48.028668

Valid Perplexity: 87.65

	 Reducing learning rate
Generated Text after epoch 17 ... 
 , , and that that that  that that that that that  as that that that that  that that that that that  that that that that that  that that that that that  as that that that that  as that that that that  as that that that that  so that that that that  in that that that that  it that that that that  so that that that that  that that that that that  that that that that that  so that that that that  that that that that that  in that that that that  so that that that that  so that that that that  so that that that that  that that that that that  so that that that that  in that that that that  that that that that that  so that that that that  that that that that that  that that that that that  in that that that that  that that that that that  in that that that that  that that that that that  s

 human silver , and and and  she , that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that tha

 . hans that he , the  , and , , he  . , he , ,  and , , , ,  that , , , ,  he , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  was , , , ,  was . , , ,  . , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  was , , , ,  i , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  was , , , ,  i , , , ,  , , , , ,  , , , , ,  that , , , ,  he , , , ,  , , , , ,  , , , , ,  , , , , ,  i , , , ,  he , , , ,  i , , , ,  that , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  was , , , ,  i , , , ,  , , , , ,  , , , , ,  was , , , ,  i , , , ,  that , , , ,  , , , , ,  , , , , ,  that , , , ,  , , , , ,  he , , , ,  , , , , ,  that , , , ,  , , , , ,  , , , , ,  that , , , ,  i , , , ,  , , , , ,  , , , , ,  i , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  that , , , ,  he , , , ,  , , , , ,  , , , , ,  that , , , ,  he , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  , , , , ,  i , , , ,  he , , , ,  that , , , ,


Valid Perplexity: 81.77

Generated Text after epoch 35 ... 
 him , and he and that  and that that that he  , that that he and  on it and and and  and that that that that  that that that that that  he that that that she  he that that that that  she and that that he  , and that that that  she and that that that  that that that that he  he that that that that  she and that that the  one , that that that  that that that that that  he that that that that  he that that that he  , that and and the  one and which that that  and and that that he  that that that that the  that that that that the  that that that that that  he that that that the  man , that that that  he that that he and  that that that that he  he that that that that  he that that that he  he that that that that  she and that that he  , that and and that  he that that that he  that that that that that  that that that that he  he that that that he  , that that he and  all that that that that  that that that that the  that that th

 UNK . he and he and  that the father , who  , and that that the  well , and he the  one and and and the  one and and that that  all the good i have  it for you , and  you have the good man  . he , and the  good , and he he  he and that that the  one , and that that  that that that that that  that that you you you  you as as as i  do , i can not  so so you i have  that and i have no  little and and and and  that that that it the  well . he and the  good , and he and  and and that that the  father , and he and  and that that that the  one , and that that  for it , and the  father and and and he  , and that that that  that the good you can  come and and and he  in the other time ,  said the father , and  , he , and that  the father , and the  father and and and and  all that that that that  the good i have the  father you and and so  good you i can no  and and but and as  as as as i i  as as i have ,  you have the good and  for you i have the  good blow , and the  mother and and he he  i


Valid Perplexity: 85.36

	 Reducing learning rate
Generated Text after epoch 47 ... 
 was the king which the king  , and that that the  little king , that that  that that that that a  night , the king was  a king and that the  king , and he the  other and and that that  the king , and the  next morning that that the  night that that he the  night that that he that  it and the king and  that the king and the  father and and all the  king and that that that  he that that that the  night that that that the  one , and he that  all that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  the night you you you  not no one to not  be so one that you  that that that you you  have no one . the  father said , i have  no one

 them . and she and the  maiden and and and and  one and the other and  which that that that the  other which the little maiden  which the little maiden was  home and and the maiden  , and that that the  other which the little maiden  which the little maiden came  , and and and that  he that that that the  other which that that the  night the little daughter which  that that that the little  king came to meet the  door and the other which  which that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  that that that that that  she that that the little  girl came and and and  it that that that that  that that that that the  little maiden came home and  and and the maiden and  and and and that that  that that that that that  she that the little maiden  , and the little maiden  and and the maiden and  which that that that the  little maiden came home and  and and the maiden and  and the other and

(30).(24).(92).(45).(88).(34).(67).(75).(17).(26).
Average loss at step 56: 3.384006
	Perplexity at step 56: 29.488678

Valid Perplexity: 76.75

	 Reducing learning rate
Generated Text after epoch 55 ... 
 all a great stone and and  it that that that that  that the little man was  all so that that she  which that that that the  little man was all a  wicked and and which it  that that the fox came  out and and that the  little girl and not not  leave . the man said  , i have not so  long as you do ,  and i not not so  that that that i not  no one , and not  not long for you that  that i have not so  well . and she the  other and and that the  little girl came home and  not that , she the  two maiden and and the  father and and that that  that that that that she  he that that he the  son which that that that  was all all and the  young man and that the  little girl and not not  leave . the man said  , i have not so  long as you do ,  and i not not so  so long and i have  that for you . t

 you will not not soon leave  it in the table and  do you do you so  so as the old woman  he said that the old  old woman was all so  great which that as the  old man said , i  do not know what the  old king , and i  said , i will not  soon soon soon so that  as the old woman was  very very very that the  whole table which he the  old man and and that  as the old woman came  to the end and the  beautiful maiden which that the  beautiful man and that that  he that that that that  that he was so very  the maiden that as the  little woman which that the  beautiful man was so very  one for him the table  which the old man was  and and that that the  beautiful maiden which that a  beautiful man and the maiden  who the beautiful maiden which  he that there and a  little man and the maiden  in the whole table which  he that and and the  beautiful maiden which that that  as the old woman was  once and her a great  very which which which that  as the old woman was  once and all a great  very wh

 a tree . the king and  that the king was and  that the king which the  children which that that the  king was so little one  that the old woman was  not and cut the little  tree which the king which  it and and the king  was so little one and  that that the king which  it and and that the  old woman said , that  i do what my dear  wife , the king ,  and the king which the  queen and the other and  so that that the king  was so great which the  king and that the king  which the king which the  daughter and and that that  that the king was and  but the king and the  king and the other and  the other and that the  old man was and and  it and that the king  was so great which she  and the king and that  it was not a little  and and that the king  came home and the king  and the old maiden was  and and that the king  was so great the children  which that it was a  children which was the children  which the king which the  queen and the children which  all so that the king  were so great wh

 the miller which the wolf which  that the little tailor which  which the other which the  boy which the other which  which the other which the  old woman which the other  that the wolf which the  old man came and the  little tailor which the old  woman which the old woman  had not the other one  . the king and the  king and the other man  came home and put the  door which the wolf which  all the great which the  wolf which the other which  the other which which the  old man came and the  little tailor which the other  that he was to keep  the door and the other  that the wolf which the  other which the other which  that the other which the  wolf which the other which  he the other which the  wolf which the other and  the other which the wolf  which the other and the  boy and he the other  that the wolf was so  so that the wolf which  all the great which the  old man which he all  at each other and said  that the king was so  dead that that he was  all so long and the  other which the 

(37).(61).(7).(54).(25).(8).(52).(82).(94).(62).
Average loss at step 73: 3.180877
	Perplexity at step 73: 24.067853

Valid Perplexity: 79.98

Generated Text after epoch 72 ... 
 the old woman said and the  old woman was home and  eat that and do not  be as my wife .  and the king and the  old woman came home and  see that it which the  king's daughter were all and  that the old man were  on a great stone which  all in a stone and  they and that that the  father was so glass which  that the king said that  that the old woman was  not and pass which that  it that that that the  queen was not that the  queen was not for the  wedding . the man was  for all and which the  son which the son which  which the daughter which which  were so dead which the  third which the king's daughter  said , i not not  that you and my father  , and that that that  i not not that my  wife which that that the  king was so dead and  life and pass which the  third and the king which  all in a large which  that 

 might see how the little man  which the son was so  dead that the little man  came and put the other  light and the little man  that the king which the  devil which the son was  full which the other which  the other , and that  that the devil which the  son which the youngest which  it and no one and  it for them and see  how the king which the  old woman was so little  that the fox which the  devil which the son which  the youngest which he was  a little fellow and the  tree . the man came  to her and ate and  put and her little bed  his which , and the  little man and the man  lay for the little man  that the man was so  handsome that the man was  a beautiful bird and the  youngest which the father which  the son said , ``  what do you do ,  how i do that my  child which my father have  the king's man , how  do you do that my  dear father was a little  bird and the little man  said , i do not  know that , and have  not a little fellow for  him . '' the mother  went and put her littl

 money and the king which the  old woman came home and  let him go home and  fetch the little and the  maiden which the son which  the son which that the  king which the son which  the son which the son  , and the son which  the son which which the  son which which which the  queen which which the son  which which the son which  came and the son which  all the son the son  which which the son which  which all in a little  ring which which the son  , and the son which  which the son which which  the son which the son  that the son came to  her , said the king  , and the king and  son which the son which  it and the son and  that the king which the  son which the son which  the son which which the  beautiful which the son which  the son came and the  beautiful maiden , and the  king which the son which  the son and the son  the son which that the  king which that the son  , and the son which  that the son , which  no one and the king  where the king which that  it and the son that  were 

 who no little coat and which  she the daughter and the  old woman came home and  eat and drink and the  father and mother which the  old woman came home and  eat and meet it and  said , i not not  not my wife has not  a little glass and my  human form and my little  table which my wife are  not and do my wife  have her wife and my  little glass , you are  the roast meat and my  human shape and little stone  which my little wife has  the roast meat which my  good sister which you not  come and do my wedding  for my human form and  eat and eat and my  little glass has not my  roast glass my little glass  shall not be my silver  stone and my little glass  pieces and my little wife  are home and eat and  eat and drink and my  wife are them and my  little wife which a little  glass which that that that  that the little wife were  a glass . that that  the devil came home and  cut the glass which that  that the little maiden were  divided his little and and  the maiden and each other  to pas

 said the king , and that  you do not know that  . the king said that  that that he were a  white daughter , and that  he were not so long  that that that the king  and not that the king  which he were to be  so great a great stone  , that that the king  was a king which the  king which he were to  go home and put the  pair which that that that  he were a king which  the king which he were  there , and that the  fox which the king which  he were to be an  king and the king which  he were to be a  king which the king which  he were to be a  king and the king which  that he were a king  and the king which that  they were a king which  they were to be a  king which the king which  she were to be a  king and the king which  he were to be a  king which the king which  he were to be a  king and the king which  they were to be a  glass which the king which  that that that the king  which he were to be  an king and the king  , that that the king  , and that that the  old man said that the  fat

 for his wife , and the  king which the son which  the daughter which the son  the son which were all  very so that it and  not that the man were  all all and the king  and death that the man  came home and said the  king , and that that  i not not the dragon  which are a great stone  and the king and what  you do that you are  an wife , and that  what that that you have  the king , and that  that that that the king  are all dead and do  what that that you have  the king which are a  little meat and a great  cry which the king which  that the woman were all  all so long as the  little white horse and the  old woman said , i  do not know what that  do you have a great  man which you have a  stone and you do not  know that that you have  the king and not not  the man , and that  that the king and not  the man , and the  son and the king which  that the woman came and  be all and the king  , and that that the  king which the king and  what that the man came  for his wife and the  king and

 boy , the king was so  beautiful that the king which  no one were so long  that the old woman were  a great deal which which  one one and the three  wife and all and the  daughter which he was to  see that , the king  and not that , the  woman said that the king  had him , and that  the king , and the  old woman was all on  it as long as it  as it as that the  king which had to be  hold as all as the  king and not that ,  and that the king was  so handsome that the king  was so handsome that the  old woman were all so  very long that the old  soldier said , `` that  that that no one are  me , do not know  it and the king which  would not be so very  long as it as that  it as all as the  devil said , `` that  that that i not not  not as fine as as  long as it as that  your wife . '' the  king said that the king  and the king , and  the king , and the  three man , and the  daughter , and that the  maiden was so handsome that  the king which had to  pass that the king was  sitting as all

 rich that it would not be  hold for a little light  for the little bed and  it in it . the  man gave him a little  little light and its bed  , and began to sing  which it in the little  pieces which which the daughter  and the daughter which it  the daughter , and the  daughter which it in the  wood which the daughter which  began to sing that the  man came home and sing  that the little bed ,  until it hold for the  four bed , and when  it it at all the  night the child came and  rest and rest which it  and all a little bed  its little bed and which  his wife wife which it  at last the child came  home and laid it so  a bed and it it  so long that the child  also and the daughter which  the daughter the daughter which  at each other , the  king and the son and  his daughter , and it  began to stop that the  little brother came and and  that the king which were  son the daughter , and  the daughter and it at  this all which came in  a little bed which the  queen which the daughter whi

 man to gretel . what do  what do you have .  what do you do .  hans said , hans home  . '' the girl said  to her , what have  it like be . but  what do you take that  for me . what have  the devil , you have  nothing . what do you  take . what do you  know . what do you  have . what do you  do . what do you  give me . what do  what do you have .  what do you do .  that do n't take it  to my wife . gretel  , hans . what do  n't what do do that  it is to be my  silver wife . what do  n't do what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  give you . i have  nothing but death . ''  the man said , ``  hans . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  do . what do you  know that . '' the  old man had 

 apple . the man said ,  no , i have no  father and death , and  that the little man said  , i will not be  your wife . and the  king said , i will  see what i have to  see that my name is  dead . and that the  little man said , i  must not have not no  long that that do you  be my wife . the  little man said , i  know that that is the  duck . the man said  , i must not have  my daughter . i have  my daughter to be my  husband . the man said  the father , i will  not be my bride ,  i will not be my  wife . that that i  will not be my wife  again . the man said  and that the little man  were in , and the  father and the daughter said  , i must go home  and you my wife ,  that i will not be  my bride , i have  my wife . the man  said , i will not  be my bride , i  will not be my bride  , i will not be  your wife . and the  man said , i will  see what i have to  be my wife , and  if you have not no  answer , and said ,  that is not my wife  . the man said ,  that is not my wife  , and tha

 will do you that , and  that the king and the  queen and the king and  no one , and the  old woman , and the  queen and the king ,  and the king and the  young king and the king  's son and the king  began to cry , and  the young king said ,  i will not eat and  lie so that my little  little ones , and the  young king and the king  , and the king and  no one , and the  king , and the king  , and the king ,  said the king , and  the king and the king  's daughter , and the  old woman which the king  and the king and the  king and the king and  no one , and the  old woman , and the  old woman and the king  and the king and the  old woman which the king  and the king and the  king and the king and  his daughter , and the  old woman which the king  , and the king ,  until the king and the  son said , `` father  that i do not know  it and do the old  king , and now not  be my wife , and  it is not so fine  if my father , and  the king and the king  's daughter , and the  old woman which th

 the man and the bride ,  but the two brothers went  out and lighted the king  which it at all her  rest and the shoe ,  and at last the child  had rest which the two  other sisters , and at  last the child had rest  full which the son and  his son and the son  's son went home and  kissed her and went and  seek the wife and the  son at last the child  had rest which it at  an end , it had  an bride and the son  , and it had no  wife , it had no  wife , it had no  wife , the two woman  came home and threw her  down and the king and  she went home and put  his horse home and went  to the king and had  the shoe and the shoe  came to her wife which  fell at each other ,  but the two brothers went  to her father , and  went and put her foot  home and put her hand  in his bed and went  home and put her foot  into the bed and went  and put her arm and  the king and had not  no one to be an  rest and the shoe ,  he had no wife the  maiden went home and put  him on the shoe which  came on the 

 near him , and the king  said , `` that i  will take it home and  i have my bride ,  and i will take you  to my human shape .  '' the miller said ,  '' yes , and the  king and the king had  nothing but the old man  . the man , and  he at last the old  old women were full of  wood , and when the  girl saw the old man  with it to the king  . the king 's son  and the old woman had  to eat , the king  asked what the king was  the king , and the  king , and the old  maiden , and the king  must not hold it ,  and told him the old  man , the old witch  were full , ate the  old woman , and the  maiden , and the king  went home and went and  and the king 's son  and the old woman was  all a little of the  water , and the king  must not hold the little  of anything and said ,  take it home , but  i will not take it  home . '' the girl  with the old woman ,  which he had to eat  to the old man ,  the king , however ,  should not hold it ,  the king said , ``  my child , what do  i do that that i

 away with the gold which the  sun had not no one  any one for her and  see that it should not  get out , and had  not an end of a  gold and which he was  in great sleep . the  little tailor was just so  a long long one could  that it should be an  old woman 's son ,  she was not so as  he had no human form  for the king , and  give her to return to  the door which had no  wife any one and be  all . and that the  old woman had to be  laid on the cock ,  and went to the door  and the daughter and the  door , the old woman  had her son , and  at last the king rejoiced  the king , and the  whole castle , and had  her daughter to her .  and as the king had  her son and the son  which had passed and the  king began to stop the  bed , and himself himself  to an end , and  her father gave her all  to the castle which the  door was the bride and  but a handsome man ,  the king , however ,  did not know how to  go and cried , i  will not go home ,  or not that you are  so long that that the  li

 UNK . `` that is no  one be my wife .  '' and when the tailor  heard that , the tailor  ran down and the tailor  again , and the princess  came home and cut it  in a bed and the  king , who was so  as that as a little  woman and the tailor ,  and the tailor went into  the stable the little tailor  fell on the fire ,  the window and laid it  for her wife for the  table , and the tailor  fell in a bed and  bed for her . the  second was for a little  man which she was to  drive home home and the  girl was full , and  as the tailor heard that  for the tailor , and  he went and put her  little and the little light  once in the bed and  heard that and that the  girl , who went and  cut the light and the  tailor , and she went  home and threw it into  her and the tailor ,  when the tailor saw the  man and the king ,  however , should not be  too so that it was  not so long as it  fell in his bed and  she put the little light  for her little having life  it for her and the  queen , and the ta

 she would not get on a  great glass which might be  an bride , who was  so beautiful that it ,  cried the angel , and  he went home and asked  that it should be as  as that as that as  no one knew him .  it must be as if  she would not be an  cause , and it must  not be too so as  they will not be as  that as that that you  can not find it ,  for you shall not be  no one for me ,  and the angel , i  will not have you for  you . '' and now  that came and said that  no one knew them ,  the prince said , i  must not be as no  long as it shall be  your wife . but the  girl said , i will  get for me . ''  and went and put the  whole sea , and went  home and put it so  until the door . the  bride went home and went  into the forest , and  at length , and went  in and said , i  must not be as no  dream , but that i  can not be as that  for me , but that  shall you come home and  eat , i will not  be my wife and the  water , i will not  be my wife and it  do not find it .  '' the duck had no 

 . and the king said ,  i will not return to  it , you are so  so beautiful that that that  , the king is so  well that you are so  so beautiful that you are  my wife , and you  be so beautiful that you  shall not learn that ,  it is not my wife  . the king said ,  yes , that must not  return to that the whole  court , and will not  learn what you are so  long , that that the  whole court , and will  go home home home and  do that you are so  beautiful that you are so  beautiful that you are not  so well . you are  not so fine that my  husband are not to be  your wife . the king  and that that the king  would not come home and  tell him the whole which  , however , would be  all in a place and  they said , that are  not a fine fine trade  , you , dear father  , the king 's daughter  god , that do you  come home home home and  take you home and die  again . '' the bear  said that the king was  so beautiful that the king  said , i will not  return to you , and  tell you the king and  hav

 , the king and his daughter  to death . and soon  after the king 's daughter  , the man was so  long as if it would  not have an end ,  that the king 's daughter  , and the king ,  and the king , who  would not come home and  keep on the ground and  put a little cloth which  which had no one power  all him , and the  faithful john said , dear  man , i will not  have it for you and  be my bride , and  shall not be my wife  but it and you have  my bride , and you  have no one for me  that you have no one  but my wife , i  am my bride to be  my wife , and the  old man says , i  know that i will not  be my bride , and  will not be my bride  for me , i will  not have the whole ones  my wife in its life  or my wife , and  you will be my bride  . and the man said  , i will not be  no one for me ,  that i know that you  must be my bride to  die . i will not  have it for you and  be my bride , and  can not know what it  shall be my bride and  do , but that you  may not an end ,  you shall be m

 UNK . `` if you not  you have it . the  youth and the son went  and went home home and  brought it home and said  to her , she will  do you not , and  have the old woman ,  if you are my bride  , and will not be  sure , you will not  eat home . '' the  man went home and took  her for a glass which  which she would not be  hold of a great lake  which she will not hear  what that is for a  day , and the old  man is not i. and  fell to her little bed  which she should not be  hold of a great lake  . if the old woman  were all still in his  cellar and cut the glass  gold and it in the  cellar and see what it  would be my wife .  the man and the old  maiden were all still alive  until she would not be  no one , she will  eat home and see what  i will have it ,  or if you will not  be my bride , and  must not be sure to  be my bride , and  will not be my wife  for them . the woman  began to cry , and  at last the child had  come home and see what  it should be an bride  's son . the mother 

 a great glass of straw .  for her wife had the  wife it for her little  child , and the little  girl would be present as  the gold , and the  little dwarf said , i  am here , or i  will be my bride ,  until the mother says ,  i will be my bride  the child . you will  take me out , and  then that as it shall  be my bride , and  what are you here for  my wife , and you  do not know what that  are you for it ,  or you shall be still  too that that i do  you for me , and  it is not here ,  i will go home ,  for the arm always ,  until that was all to  the reward , and the  arm always came home and  drink and if she would  do it for it .  the girl went home and  kissed her little bed until  it was all to rest  all the earth that the  girl had rest which it  had no rest until she  did not come . it  agreed that , i will  go home , or you  can be my bride ,  for it should be ''  and the girl went home  again , the girl went  into her bed and cried  to her , i will  be my bride , and  if you 

 covered him home and said ,  '' dear brother , dear  father and that you will  not be my bride ,  for the fox , however  . said that the old  king was there in the  shape which it should be  be there . and when  the old witch were so  long until the old witch  had to play , and  the old witch saw that  a little man had become  a glass . the old  girl , however , were  so beautiful that the old  old witch were full which  was all there and a  duck for her and the  old man was there in  its bed , and it  hold as that as the  man said , i will  receive it so long that  it which i will not  die for you , and  not not be my bride  , and it was a  bride , and it was  all there and the whole  forest , and the old  devil was so beautiful that  the old man were so  only that they would not  know it , and went  into her bed and saw  it so that the old  witch saw it there and  no one knew how the  children come home and be  the bride , said the  king , and the old  man are now no one  for them ,

 's children , and went home  and kissed him , and  ran for her so long  until the old woman went  home and cut her a  thousand glass its glass which  that the little tailor ,  however , as she was  all in a great glass  and half a glass ,  until the girl went home  so a great glass of  half a glass which the  children saw a great glass  which that it at last  began to cry and a  handsome man , and the  girl threw behind her a  glass which which the children  had laid her dead so  bed until the glass a  great glass of bread and  cut the great stones ,  but the king went home  and cut a great glass  one which for a glass  deal one pieces . the  old woman went home and  cut a great glass which  that it was a great  dove , and at last  one of joy which had  in a glass glass which  glass for a great glass  , and the girl was  over and cut a glass  its glass which a glass  joy , and the queen  again at each other and  saw a great deal which  for a great deal which  money in a glass which  p

KeyboardInterrupt: 