In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import urllib.request

import tensorflow as tf
from tensorflow.models.rnn.ptb import reader
%matplotlib inline

In [2]:
# Load Data
file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)
    
with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

Data length: 1115394


In [3]:
vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
print('The Vocab Size is: ', vocab_size)
print('The vocab_to_idx  is: ', vocab_to_idx)
print ('The data lenght is: ', len(data))

The Vocab Size is:  65
The vocab_to_idx  is:  {',': 0, 'D': 1, 's': 20, 'X': 2, 'I': 4, 'u': 5, 'r': 6, 'h': 51, 'l': 9, 'W': 10, '!': 11, 'w': 12, ';': 40, '3': 15, 'q': 19, '\n': 18, 'p': 61, 'K': 44, 'G': 21, 'x': 22, 'J': 23, '?': 24, 'P': 25, '.': 26, '-': 27, '$': 32, 'f': 29, ' ': 31, 'L': 33, 'b': 34, 'V': 35, 'o': 30, ':': 36, 'n': 37, 'k': 38, 'e': 39, 'g': 48, 'R': 41, 'Y': 42, 'H': 43, 'A': 45, 'j': 60, 'E': 46, 'c': 47, 'F': 16, 'O': 49, 'N': 50, 't': 17, '&': 28, 'B': 3, 'd': 52, 'z': 13, 'Z': 53, "'": 54, 'U': 14, 'y': 56, 'm': 57, 'C': 58, 'S': 59, 'T': 55, 'Q': 8, 'v': 62, 'M': 7, 'a': 63, 'i': 64}
The data lenght is:  1115394


In [4]:
# Useful Tools:
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, num_steps)
        
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

for idx, epoch in enumerate(gen_epochs(5, 2, 3)):
    print (idx, epoch)
    

0 <generator object ptb_iterator at 0x11ef040f8>
1 <generator object ptb_iterator at 0x11ef04150>
2 <generator object ptb_iterator at 0x11ef040f8>
3 <generator object ptb_iterator at 0x11ef04150>
4 <generator object ptb_iterator at 0x11ef040f8>


In [88]:
def build_dynamic_rnn_graph_with_list(
    state_size = 3, #state_size = 100,   # State size is the number of hidden layer in the hidden unit.
    num_classes = 6, #num_classes = vocab_size,
    batch_size = 2, #batch_size = 32,
    num_steps = 4, #num_steps = 200,  # number of steps is actually no of sequence
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

#     x_one_hot = tf.one_hot(x, num_classes)
#     print ('The One Hot vector is: ', x_one_hot)
#     rnn_inputs = [tf.squeez e(i,squeeze_dims=[1]) for i in tf.split(1, num_steps, x_one_hot)]

#     cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
#     init_state = cell.zero_state(batch_size, tf.float32)
#     rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
    
    # For a Dynamic_RNN we need the input in the form of a matrix
    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)
    
    # The process from hidden layer to the output layer.
    cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
    
#     rnn_outputs: [no_of_batch x no_of_sequences x no_of_hidden_units]
    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes]) # no_of_hidden_layers x no_of_classes
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
        
    
#     W = tf.get_variable('W', [state_size, num_classes]) # no_of_hidden_layers x no_of_classes
#     b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
        
        
        
    rnn_outputs_new = tf.reshape(rnn_outputs, [-1, state_size]) 
    # [no_of_batch x no_of_sequences x no_of_hidden_units]
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs_new, W) + b
    
    # logits : [no_of_sequences x no_of_hidden_units] x [no_of_hidden_layers x no_of_classes] + 

#     y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, num_steps, y)]

#     loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
#     losses = tf.nn.seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
#     total_loss = tf.reduce_mean(losses)
#     train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

#     return dict(
#         x = x,
#         y = y,
#         init_state = init_state,
#         final_state = final_state,
#         total_loss = total_loss,
#         train_step = train_step
#     )

    return dict(
        x=x,
        y=y,
        embeddings = embeddings,
        rnn_inputs = rnn_inputs,
        init_state = init_state,
        rnn_outputs = rnn_outputs,
        final_state = final_state,
        rnn_outputs_new = rnn_outputs_new,
        logits = logits,
        W = W
    )

g = build_dynamic_rnn_graph_with_list()


In [89]:
def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):
    tf.set_random_seed(2345)  # We set the random seed to track the same random chosen datapoints
    with tf.Session() as sess: # Open the session
        sess.run(tf.initialize_all_variables())  # Initialize all the variables
#         training_losses = []
#         for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
#             training_loss = 0
#             steps = 0
#             training_state = None
#             print (epoch)
#             for X, Y in epoch:
#                 steps += 1
#                 # X is the ids with the sequence of the sentence and Y is the prediction sequnence
#                 print (len([idx_to_vocab[ch] for ch in X[0]]))
#                 print (len([idx_to_vocab[ch] for ch in X[1]]))
#                 print ('')
#                 print (len([idx_to_vocab[ch] for ch in Y[0]]))
#                 print (len([idx_to_vocab[ch] for ch in Y[1]]))
        x = np.array([[1,2,3,4],[1,3,4,0]])
        y = np.array([[2,3,4,5],[3,4,0,5]])
        feed_dict={g['x']: x, g['y']: y}
        a , b, c, d, e, f, g, w = sess.run([g['embeddings'], 
                                   g['rnn_inputs'], 
                                   g['init_state'], 
                                   g['rnn_outputs'], 
                                   g['final_state'],
                                   g['rnn_outputs_new'],
                                   g['logits'],
                                   g['W']],  feed_dict)
        print ('embeddings', a) 
        print ('')
        print ('rnn_inputs ', b)
        print ('')
        print ('init_state ', c)
        print ('')
        print ('rnn_outputs ', d)
        print ('')
        print ('final_state ', e)
        print ('')
        print ('rnn_outputs_new ', f)
        print ('')
        print ('logits ', g)
        print ('')
        print ('weights ', w)

        
        print ('')
        print ('')
        print ('popopoopopopopooopoopopopopoopooopo')
        print ('')
        print ('')
        
        x_new = np.array([[1,4,2,2],[1,4,3,0]])
        y_new = np.array([[4,2,2,5],[4,3,0,5]])
        feed_dict={g['x']: x_new, g['y']: y_new}
#         a , b, c, d, e, f, g, w = sess.run([g['embeddings'], 
#                                    g['rnn_inputs'], 
#                                    g['init_state'], 
#                                    g['rnn_outputs'], 
#                                    g['final_state'],
#                                    g['rnn_outputs_new'],
#                                    g['logits'],
#                                    g['W']],  feed_dict)
#         print ('embeddings', a) 
#         print ('')
#         print ('rnn_inputs ', b)
#         print ('')
#         print ('init_state ', c)
#         print ('')
#         print ('rnn_outputs ', d)
#         print ('')
#         print ('final_state ', e)
#         print ('')
#         print ('rnn_outputs_new ', f)
#         print ('')
#         print ('logits ', g)
#         print ('')
#         print ('weights ', w)

        
        """
        embeddings: Is basically the weights from each words from the vocabulay list to all the hidden units.
        rnn_inputs: This takes the weights from embeddings for the corresponding input sequence.
                    Is the unput from each sequence to the hidden layer 
                    shape = [batch_size x no_of_sequence x no_of hidden_inits]
        
        """
#                 print (sess.run(x_one_hot))
# #                 print (g['x_one_hot'].eval())
#                 dictionary = sess.run(feed_dict)
#                 a = g['x_one_hot'].eval()
# #                 print ([w for no, w in enumerate(g['x_one_hot'])])
# #                 print (g['rnn_inputs'])

#                 break
#                 if training_state is not None:
#                     feed_dict[g['init_state']] = training_state
#                 training_loss_, training_state, _ = sess.run([g['total_loss'],
#                                                       g['final_state'],
#                                                       g['train_step']],
#                                                              feed_dict)
#                 training_loss += training_loss_
#             if verbose:
#                 print("Average training loss for Epoch", idx, ":", training_loss/steps)
#             training_losses.append(training_loss/steps)

#         if isinstance(save, str):
#             g['saver'].save(sess, save)

#     return training_losses

# t = time.time()
# build_dynamic_rnn_graph_with_list()
# print("It took", time.time() - t, "seconds to build the graph.")

t = time.time()
train_network(g, 3)
print("It took", time.time() - t, "seconds to train for 3 epochs.")

embeddings [[ -2.23231614e-01   3.97504985e-01  -4.65226859e-01]
 [  9.35203433e-02   1.53714418e-01   2.30824292e-01]
 [ -8.26793313e-02  -4.87440944e-01  -4.58240509e-04]
 [ -5.99353313e-02   1.21155381e-02   4.58998263e-01]
 [  5.91485500e-02  -2.89413124e-01  -6.78060651e-01]
 [ -6.63731456e-01  -6.24574482e-01   4.48079288e-01]]

rnn_inputs  [[[  9.35203433e-02   1.53714418e-01   2.30824292e-01]
  [ -8.26793313e-02  -4.87440944e-01  -4.58240509e-04]
  [ -5.99353313e-02   1.21155381e-02   4.58998263e-01]
  [  5.91485500e-02  -2.89413124e-01  -6.78060651e-01]]

 [[  9.35203433e-02   1.53714418e-01   2.30824292e-01]
  [ -5.99353313e-02   1.21155381e-02   4.58998263e-01]
  [  5.91485500e-02  -2.89413124e-01  -6.78060651e-01]
  [ -2.23231614e-01   3.97504985e-01  -4.65226859e-01]]]

init_state  LSTMStateTuple(c=array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]], dtype=float32), h=array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]], dtype=float32))

rnn_outputs  [[[-0.01344888  0.03842906 -0.0



IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
weights  [[ 0.43188667 -0.63292527  0.77183008 -0.97397757  0.216676   -0.11577225]
 [ 0.80125594  0.91688633  0.74394011  0.86566377 -0.27232838  0.01420283]
 [-0.91036487 -0.96914315 -0.43888903  0.78052235  0.54020953 -0.86834955]]
It took 0.07764196395874023 seconds to train for 3 epochs.

In [5]:
def build_multilayer_lstm_graph_with_dynamic_rnn(
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    num_layers = 3,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

    # Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y so we can get the logits in a single matmul
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs, W) + b

    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step
    )

In [6]:
t = time.time()
build_multilayer_lstm_graph_with_dynamic_rnn()
print("It took", time.time() - t, "seconds to build the graph.")

It took 0.6965160369873047 seconds to build the graph.


In [7]:
g = build_multilayer_lstm_graph_with_dynamic_rnn()
t = time.time()
train_network(g, 3)
print("It took", time.time() - t, "seconds to train for 3 epochs.")

Average training loss for Epoch 0 : 3.50457037043
Average training loss for Epoch 1 : 3.3163155057
Average training loss for Epoch 2 : 3.25275716288
It took 478.65483713150024 seconds to train for 3 epochs.
