In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import urllib.request

import tensorflow as tf
from tensorflow.models.rnn.ptb import reader
%matplotlib inline

In [2]:
# Load Data
file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)
    
with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

Data length: 1115394


In [3]:
vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
print('The Vocab Size is: ', vocab_size)
print('The vocab_to_idx  is: ', vocab_to_idx)
print ('The data lenght is: ', len(data))

The Vocab Size is:  65
The vocab_to_idx  is:  {'C': 0, 'E': 20, 'b': 1, 'p': 2, '$': 4, 'a': 5, 'O': 7, 'R': 8, '.': 9, 'e': 10, 'N': 49, 'A': 12, 'm': 13, 'z': 14, '&': 15, 'i': 17, 'Z': 46, 'q': 18, 'G': 36, 'J': 21, 'S': 22, 'U': 23, 'B': 16, 'D': 58, 'r': 24, '-': 52, 'L': 25, 'w': 26, '!': 28, 'V': 30, 's': 61, 'T': 31, 'c': 32, 'h': 33, 'g': 34, ' ': 35, 'F': 37, 'd': 38, ';': 39, 'M': 3, '?': 40, 'Y': 41, 'y': 43, '3': 53, 't': 44, ':': 45, 'Q': 6, 'K': 47, 'v': 48, 'W': 27, "'": 62, 'X': 50, 'u': 19, 'l': 63, 'P': 42, 'k': 54, ',': 55, 'H': 56, 'x': 29, '\n': 59, 'j': 60, 'o': 11, 'f': 51, 'I': 57, 'n': 64}
The data lenght is:  1115394


In [4]:
# Useful Tools:
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, num_steps)
        
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

for idx, epoch in enumerate(gen_epochs(5, 2, 3)):
    print (idx, epoch)
    
def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):
    tf.set_random_seed(2345)  # We set the random seed to track the same random chosen datapoints
    with tf.Session() as sess: # Open the session
        sess.run(tf.initialize_all_variables())  # Initialize all the variables
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            training_loss = 0
            steps = 0
            training_state = None
            for X, Y in epoch:
                steps += 1

                feed_dict={g['x']: X, g['y']: Y}
                if training_state is not None:
                    feed_dict[g['init_state']] = training_state
                training_loss_, training_state, _ = sess.run([g['total_loss'],
                                                      g['final_state'],
                                                      g['train_step']],
                                                             feed_dict)
                training_loss += training_loss_
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            training_losses.append(training_loss/steps)

        if isinstance(save, str):
            g['saver'].save(sess, save)

    return training_losses

0 <generator object ptb_iterator at 0x1052440f8>
1 <generator object ptb_iterator at 0x1052440a0>
2 <generator object ptb_iterator at 0x1052440f8>
3 <generator object ptb_iterator at 0x1052440a0>
4 <generator object ptb_iterator at 0x1052440f8>


In [5]:
def build_multilayer_lstm_graph_with_dynamic_rnn(
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    num_layers = 3,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

    # Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y so we can get the logits in a single matmul
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs, W) + b

    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step
    )

In [None]:
t = time.time()
build_multilayer_lstm_graph_with_dynamic_rnn()
print("It took", time.time() - t, "seconds to build the graph.")

In [None]:
g = build_multilayer_lstm_graph_with_dynamic_rnn()
t = time.time()
train_network(g, 3)
print("It took", time.time() - t, "seconds to train for 3 epochs.")