In [1]:
"""
    About me: this piece of Code checks if the code given in the url "http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html"
    performs well with the news data (training_with_sentences).
    
    The Problem that we encounter here is that the loss we receive after every batch is : "nan"
    
    BEFORE RUNNING THIS CODE YOU SHOULD FIRST RUN THE "DATA_BUILDER.PY" TO FIRST EXTRACT, CLEAN AND LOAD THE DATA
    INTO PICKLE FILES AND THEN THIS CODE WILL PART COME IN HANDY.
"""


import numpy as np
import time
import os
import urllib.request
from six.moves import cPickle as pickle


import tensorflow as tf
from tensorflow.models.rnn.ptb import reader
from gensim import corpora

import matplotlib.pyplot as plt



In [2]:
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

In [10]:
def build_multilayer_lstm_graph_with_dynamic_rnn(
    state_size = 100,
    num_classes = 10,
    batch_size = 128,
#     num_layers = 3,
    learning_rate = 1e-4):

    reset_graph()

    print ('The num of hidden unit is: ', state_size)
    print ('The Vocab size is: ', num_classes)
    print ('The batch_size is: ', batch_size)
    print ('The learning_rate is: ', learning_rate)
    
    x = tf.placeholder(tf.int32, [batch_size, None], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, None], name='labels_placeholder')

    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

    # Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
#     cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

    #reshape rnn_outputs and y so we can get the logits in a single matmul
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])

    logits = tf.matmul(rnn_outputs, W) + b
    output_state = tf.nn.softmax(logits, name=None)
    
    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        train_step = train_step,
        training_prediction = output_state
    )

In [24]:
# class Train():
#     def __init__(self):
train_batch_dir = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Word-Nets/training_batch/'
dictionary_dir = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Word-Nets/dictionary.txt'

def accuracy(predictions, labels, labels_one_hot = None):
    # The input labels are a One-Hot Vector
    if labels_one_hot:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
              / predictions.shape[0])
    else:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.reshape(labels, [-1]))
              / predictions.shape[0])
        

def train_network(g, num_batches, epochs=1, verbose=None ):
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())

        for epoch in np.arange(epochs):
            training_state = None
            training_loss = 0

            for no in np.arange(num_batches):
                with open(train_batch_dir+'batch'+str(no)+'.pickle', 'rb') as f:
                    dataset = pickle.load(f)

                    batch_train_dataset = dataset['batch_train_dataset']
                    batch_train_labels = dataset['batch_train_labels']

                    feed_dict={g['x']: batch_train_dataset, g['y']: batch_train_labels}
                    if training_state is not None:
                        feed_dict[g['init_state']] = training_state
                    training_loss_, training_state, _, tp = sess.run([g['total_loss'],
                                                          g['final_state'],
                                                          g['train_step'],
                                                          g['training_prediction']],
                                                                 feed_dict)
                    training_loss += training_loss_
                    acc = accuracy(tp, batch_train_labels)

                    print ('accuracy of the batch %d is: '%no, acc)
                    print ('')
                    print ('Average Loss for the batch %d is: '%no, training_loss_)
                    print ('')

            print ('All %d Batches Done..'%num_batches)
            print ('')

            if verbose:
                print("Average training loss for Epoch", epoch, ":", training_loss/num_batches)




In [25]:
t = time.time()
build_multilayer_lstm_graph_with_dynamic_rnn()
print("It took", time.time() - t, "seconds to build the graph.")

The num of hidden unit is:  100
The Vocab size is:  10
The batch_size is:  128
The learning_rate is:  0.0001
It took 0.3268280029296875 seconds to build the graph.


In [26]:
vocab_size = len(corpora.Dictionary.load_from_text(dictionary_dir))
graph_dict =  build_multilayer_lstm_graph_with_dynamic_rnn(state_size = 100, 
                                                           num_classes = vocab_size,
                                                           batch_size = 128)
t = time.time()
train_network(graph_dict, num_batches = 5, epochs=1, verbose=True)
print("It took", time.time() - t, "seconds to train for 3 epochs.")




The num of hidden unit is:  100
The Vocab size is:  17155
The batch_size is:  128
The learning_rate is:  0.0001
accuracy of the batch 0 is:  0.0

Average Loss for the batch 0 is:  nan

accuracy of the batch 1 is:  55.5921052632

Average Loss for the batch 1 is:  nan

accuracy of the batch 2 is:  61.8489583333

Average Loss for the batch 2 is:  nan

accuracy of the batch 3 is:  59.6514423077

Average Loss for the batch 3 is:  nan

accuracy of the batch 4 is:  54.5915570175

Average Loss for the batch 4 is:  nan

All 5 Batches Done..

Average training loss for Epoch 0 : nan
It took 36.00739288330078 seconds to train for 3 epochs.
