In [4]:
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import urllib.request

import tensorflow as tf
from tensorflow.models.rnn.ptb import reader

In [17]:
# Loading Data..
file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Practise/tinyshakespeare.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)

with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
del raw_data

Data length: 1115394


In [6]:
def build_multilayer_lstm_graph_with_dynamic_rnn(
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    num_layers = 3,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

    # Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y so we can get the logits in a single matmul
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs, W) + b
    predictions = tf.nn.softmax(logits)
    
    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        preds = predictions
    )

In [9]:
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, num_steps)

def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

def accuracy(predictions, labels, labels_one_hot = None):
    # The input labels are a One-Hot Vector
    if labels_one_hot:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
              / predictions.shape[0])
    else:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.reshape(labels, [-1]))
              / predictions.shape[0])

    
def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):
    tf.set_random_seed(2345)
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            training_loss = 0
            steps = 0
            training_state = None
            for X, Y in epoch:
                steps += 1
                feed_dict={g['x']: X, g['y']: Y}
                if training_state is not None:
                    feed_dict[g['init_state']] = training_state
                training_loss_, training_state, _, preds_ = sess.run([g['total_loss'],
                                                      g['final_state'],
                                                      g['train_step'],
                                                      g['preds']],
                                                      feed_dict)
                training_loss += training_loss_
                acc = accuracy(preds_, Y)
                print ('accuracy of the training batch %d is: '%steps, acc)
                
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            training_losses.append(training_loss/steps)

        if isinstance(save, str):
            g['saver'].save(sess, save)

#     return training_losses

In [10]:
t = time.time()
build_multilayer_lstm_graph_with_dynamic_rnn()
print("It took", time.time() - t, "seconds to build the graph.")

It took 0.7677631378173828 seconds to build the graph.


In [11]:
g = build_multilayer_lstm_graph_with_dynamic_rnn()
t = time.time()
train_network(g, 3)
print("It took", time.time() - t, "seconds to train for 3 epochs.")

accuracy of the training batch 1 is:  1.109375
accuracy of the training batch 2 is:  1.484375
accuracy of the training batch 3 is:  4.375
accuracy of the training batch 4 is:  8.125
accuracy of the training batch 5 is:  11.59375
accuracy of the training batch 6 is:  13.53125
accuracy of the training batch 7 is:  14.609375
accuracy of the training batch 8 is:  14.734375
accuracy of the training batch 9 is:  15.390625
accuracy of the training batch 10 is:  15.515625
accuracy of the training batch 11 is:  14.9375
accuracy of the training batch 12 is:  15.765625
accuracy of the training batch 13 is:  14.78125
accuracy of the training batch 14 is:  15.515625
accuracy of the training batch 15 is:  15.515625
accuracy of the training batch 16 is:  15.140625
accuracy of the training batch 17 is:  15.3125
accuracy of the training batch 18 is:  15.625
accuracy of the training batch 19 is:  15.75
accuracy of the training batch 20 is:  15.4375
accuracy of the training batch 21 is:  15.4375
accuracy

In [21]:
data = [1,2,3,4,2,3,4,5,6,7,8,9, 9,8,7,6,6,5,4,3,4,3,2,1]
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, num_steps)
        
num_epochs = 3
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps=4, batch_size=2)):
    print (idx)
#     print (epoch)
    for x,y in epoch:
        print ('popppoppopopopopoppop')
        print (x)
        print (y)


0
popppoppopopopopoppop
[[1 2 3 4]
 [9 8 7 6]]
[[2 3 4 2]
 [8 7 6 6]]
popppoppopopopopoppop
[[2 3 4 5]
 [6 5 4 3]]
[[3 4 5 6]
 [5 4 3 4]]
1
popppoppopopopopoppop
[[1 2 3 4]
 [9 8 7 6]]
[[2 3 4 2]
 [8 7 6 6]]
popppoppopopopopoppop
[[2 3 4 5]
 [6 5 4 3]]
[[3 4 5 6]
 [5 4 3 4]]
2
popppoppopopopopoppop
[[1 2 3 4]
 [9 8 7 6]]
[[2 3 4 2]
 [8 7 6 6]]
popppoppopopopopoppop
[[2 3 4 5]
 [6 5 4 3]]
[[3 4 5 6]
 [5 4 3 4]]


In [13]:
def build_graph(
    cell_type = None,
    num_weights_for_custom_cell = 5,
    state_size = 100,
    num_classes = vocab_size,
    batch_size = 32,
    num_steps = 200,
    num_layers = 3,
    build_with_dropout=False,
    learning_rate = 1e-4):

    reset_graph()

    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

    dropout = tf.constant(1.0)

    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    if cell_type == 'Custom':
        cell = CustomCell(state_size, num_weights_for_custom_cell)
    elif cell_type == 'GRU':
        cell = tf.nn.rnn_cell.GRUCell(state_size)
    elif cell_type == 'LSTM':
        cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    elif cell_type == 'LN_LSTM':
        cell = LayerNormalizedLSTMCell(state_size)
    else:
        cell = tf.nn.rnn_cell.BasicRNNCell(state_size)

    if build_with_dropout:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=dropout)

    if cell_type == 'LSTM' or cell_type == 'LN_LSTM':
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    else:
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)

    if build_with_dropout:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)

    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs, W) + b

    predictions = tf.nn.softmax(logits)

    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        preds = predictions,
        saver = tf.train.Saver()
    )

In [None]:
model_path = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Tiny-Shakespeare'

g = build_graph(cell_type='LN_LSTM', num_steps=80)
t = time.time()
losses = train_network(g, 20, num_steps=80, save=model_path)
print("It took", time.time() - t, "seconds to train for 20 epochs.")
print("The average loss on the final epoch was:", losses[-1])