In [8]:

"""
    BEFORE RUNNING THIS CODE YOU SHOULD FIRST RUN THE "DATA_BUILDER.PY" TO FIRST EXTRACT, CLEAN AND LOAD THE DATA
    INTO PICKLE FILES AND THEN THIS CODE WILL PART COME IN HANDY.
"""

"""
Notes:
    1. ENBEDDING(INPUT) LAYER OPERATION
       --> embed_to_hid_wghts = tf.get_variable('embedding_matrix', [vocab_size, num_hid_units]):
           embed_to_hid_layer = tf.nn.embedding_lookup(embed_to_hid_wghts, x)
           # Normally we convert the input vector into a one hot matrix and then multiply it to the embedded weights, 
           When we do so, we get the same embed weight corresponding to 1's in the one-hot vector but in a different 
           shape. The above operation does all that in a single shot. Basically, embed_to_hid_wghts defines a matrix 
           with weights going form all vacab to hiddenunits,and embed_to_hid_layer pulls the vectors from embedding_matrix
           (embed_to_hid_wghts) corresponding to the idices entries of x for all the batch. 
           So the matrix embed_to_hid_layer = [batch_size x num_sequences x num_hid_units]

    2. HIDDEN LAYER OPERATION
       --> The output from dynamic_rnn "rnn_output" is a Tensor of shape of [Batch_size x num_sequence x num_hid_units] and,
           The hid_to_output_wght is in the shape of [num_hid_units x num_classes]
           And We want an output with shape [Batch_size x num_sequence x num_classes]
           We horizontlly stack all the batches to form a matrix of [(Batch_size x num_sequence]) x num_classes]
       --> In the dynamic_run we provide the "sequence_length", this would say the RNN that the batch are padded after
           after the given size. Therefore the RNN doesnt consider the padded sequences while calculating the RNN output.
           When the actual sequence length is given then the RNN would simply consider the rnn_output as 0 for the padded
           sequence

    3. OUTPUT LAYER OPERATION
       --> sparse_softmax_cross_entropy_with_logits automatically converts the y's into on hot vectors and perform 
           the softmax operation When using softmax_cross_entropy_with_logits, we have to first convert the y's 
           into one-hot vector

    4. MASK THE LOSES:
       --> We can calculate the loss directly as we do for every batch. Normally to calculate the loss we do:
           loss_CE = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(hid_to_ouptut_layer, tf.reshape(y, [-1])))
           But here we dont do the complete stuff at once because we have zero (0) padded all the sequences in a batch for 
           equal size . Naively calculating the loss at each time step doesn’t work because that would take into account
           the padded positions. So the solution is to create a weight matrix that “masks out” the losses at padded positions.

    5. VERY IMPORTANT NOTE:
       --> After we receive the dictionary from the gensim dictionary we need to add 1. Because the gensim dictionary 
           is build from index 0 to n. But our training or testing batch consist of index from 1 to n+1 (achieved in data_builder.py)
           . The addition of 1 is imperative because the embedding_matrix internally builds a corpus of (n*num_hidden_unit)
           and it builds from 0 so if we dont add 1 then the last word in the dictionarry will have a nan corresponding to its
           output and our loss will be nan too. Just to recap, we add 1 because we do zero (0) padding of the sequence length for a batch. 
        
    
"""

'\nNotes:\n    1. ENBEDDING(INPUT) LAYER OPERATION\n       --> embed_to_hid_wghts = tf.get_variable(\'embedding_matrix\', [vocab_size, num_hid_units]):\n           embed_to_hid_layer = tf.nn.embedding_lookup(embed_to_hid_wghts, x)\n           # Normally we convert the input vector into a one hot matrix and then multiply it to the embedded weights, \n           When we do so, we get the same embed weight corresponding to 1\'s in the one-hot vector but in a different \n           shape. The above operation does all that in a single shot. Basically, embed_to_hid_wghts defines a matrix \n           with weights going form all vacab to hiddenunits,and embed_to_hid_layer pulls the vectors from embedding_matrix\n           (embed_to_hid_wghts) corresponding to the idices entries of x for all the batch. \n           So the matrix embed_to_hid_layer = [batch_size x num_sequences x num_hid_units]\n\n    2. HIDDEN LAYER OPERATION\n       --> The output from dynamic_rnn "rnn_output" is a Tensor of

In [9]:
import numpy as np
import time
import os
import urllib.request
from six.moves import cPickle as pickle


import tensorflow as tf
from tensorflow.models.rnn.ptb import reader
from gensim import corpora

import matplotlib.pyplot as plt



In [10]:
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

In [14]:
def dynamic_RNN_model(
    num_hid_units = 3,
    vocab_size = 7,
    momentum = 0.9,
    learning_rate = 0.01,
    output_activation_init = 'RELU' 
    ):
    print ('The num of hidden unit is: ', num_hid_units)
    print ('The Vocab size is: ', vocab_size)
    print ('The momentum is: ', momentum)
    print ('The learning_rate is: ', learning_rate)
    
    
    num_classes = vocab_size

    reset_graph()
    
    x = tf.placeholder(tf.int32, shape = [None, None], name='input_placeholder')
    y = tf.placeholder(tf.int32, shape = [None, None], name='output_placeholdr')
    x_lenarr = tf.placeholder(tf.float32, shape = [None], name='output_placeholdr')
    batch_size = tf.shape(x)[0]
    

    # ENBEDDING(INPUT) LAYER OPERATION
    # Creating an Embedding matrix with a random weight for all vacab to hidden_matrix
    embed_to_hid_wghts = tf.get_variable('embedding_matrix', [vocab_size, num_hid_units])
    embed_to_hid_layer = tf.nn.embedding_lookup(embed_to_hid_wghts, x)
    print ('The shape of embed_to_hid_wghts is: ', embed_to_hid_wghts.get_shape())
    print ('The shape of embed_to_hid_layer is: ', embed_to_hid_layer.get_shape())


    # HIDDEN LAYER OPERATION
    rnn_cell = tf.nn.rnn_cell.LSTMCell(num_hid_units, state_is_tuple=True)
    init_state = rnn_cell.zero_state(batch_size, tf.float32)  # Each sequence will hava a state that it passes to its next sequence
    rnn_outputs, new_state = tf.nn.dynamic_rnn(cell=rnn_cell,
                                               sequence_length=x_lenarr,
                                               initial_state=init_state,
                                               inputs=embed_to_hid_layer,
                                               dtype=tf.float32)
    

    # OUTPUT LAYER OPERATION
    # Initialize the weight and biases for the output layer. We use variable scope because we would like to share the weights 
    with tf.variable_scope('output_layer'):
        hid_to_output_wght = tf.get_variable('hid_to_output_wght', [num_hid_units, num_classes],
                                            initializer = tf.random_normal_initializer())
        output_bias = tf.get_variable('output_bias', [num_classes],
                                      initializer=tf.random_normal_initializer())
    
    rnn_outputs = tf.reshape(rnn_outputs, [-1, num_hid_units])
    hid_to_ouptut_layer = tf.matmul(rnn_outputs, hid_to_output_wght) +  output_bias  
    # Also use tf.batch_matmul(rnn_outputs, hid_to_output_wght) +  output_bias  
    output_state = tf.nn.softmax(hid_to_ouptut_layer, name=None)
 
    
    # CALCULATING LOSS
    y_reshaped = tf.reshape(y, [-1])
    softmax_opt = tf.nn.sparse_softmax_cross_entropy_with_logits(hid_to_ouptut_layer, y_reshaped)
    
    # MASK THE LOSES
    mask = tf.sign(tf.to_float(y_reshaped))
    masked_loss = mask * softmax_opt
 
    # Bring back to [B, T] shape
    masked_loss = tf.reshape(masked_loss,  tf.shape(y))

    # Calculate mean loss
    mean_loss_by_example = tf.reduce_sum(masked_loss, reduction_indices=1) / x_lenarr
    mean_loss = tf.reduce_mean(mean_loss_by_example)

    # The sparse_softmax uses dtype as int32 or int64
    
    # OPTIMIZING THE LOSS FUNCTION
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mean_loss)
#     optimizer = tf.train.MomentumOptimizer(learning_rate, 
#                                             momentum, 
#                                             use_locking=False, 
#                                             name='Momentum', 
#                                             use_nesterov=True).minimize(loss_CE)

    # Returns graph objects
    return dict(
        x=x,
        y=y,
        x_lenarr=x_lenarr,
        batch_size = batch_size,
        init_state = init_state,
        new_state = new_state,
        loss = mean_loss,
        optimizer = optimizer,
        training_prediction = output_state
    )



In [15]:
class Train():
    def __init__(self):
        self.train_batch_dir = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Word-Nets/training_batch/'
        dictionary_dir = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Word-Nets/dictionary.txt'
        self.vocab_size = len(corpora.Dictionary.load_from_text(dictionary_dir))

    def accuracy(self, predictions, labels, labels_one_hot = None):
        # The input labels are a One-Hot Vector
        if labels_one_hot:
            return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
                  / predictions.shape[0])
        else:
            return (100.0 * np.sum(np.argmax(predictions, 1) == np.reshape(labels, [-1]))
                  / predictions.shape[0])
        

    def train_network(self, graph_dict, num_batches, epochs=1, verbose=None ):
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
#             cdoc = np.random.choice(np.arange(20), 5) # Randomly select group of 5 batches from the cross valid dataset to test after every 20 batches
            for epoch in np.arange(epochs):
                new_state_ = None
                training_loss = 0
                
                for no in np.arange(num_batches):
                    with open(self.train_batch_dir+'batch'+str(no)+'.pickle', 'rb') as f:
                        dataset = pickle.load(f)
                        
                        batch_train_dataset = dataset['batch_train_dataset']
                        batch_train_labels = dataset['batch_train_labels']
                        batch_train_lenarr = dataset['batch_train_lenarr']
#                         print (batch_train_lenarr)
#                         print ('')
#                         print ([len(i[np.where(i!=0)[0]]) for i in batch_train_dataset])
                        
                        feed_dict= {graph_dict['x']: batch_train_dataset, 
                                    graph_dict['y']: batch_train_labels,
                                    graph_dict['x_lenarr']: batch_train_lenarr}
            
                        if new_state_ is not None:
                            print ('Using the new RNN State')
                            feed_dict[graph_dict['init_state']] = new_state_

                        bs, new_state_, loss_, opt, tp = sess.run([graph_dict['batch_size'],
                                                        graph_dict['new_state'],
                                                        graph_dict['loss'],
                                                        graph_dict['optimizer'],
                                                        graph_dict['training_prediction']], 
                                                        feed_dict=feed_dict)
                        
                        training_loss += loss_
#                         print (len(tp[0]))
#                         print ([ii for ii in tp[0]])
                        
                        acc = self.accuracy(tp, batch_train_labels)

                        print ('accuracy of the batch %d is: '%no, acc)
                        print ('')
                        print ('Average Loss for the batch %d is: '%no, loss_)
                        print ('')
                
                print ('All %d Batches Done..'%num_batches)
                print ('')
                
                if verbose:
                    print("Average training loss for Epoch", epoch, ":", training_loss/num_batches)
                        
#                     if (num_batches%20 ==0 and num_batches!=0):
#                         print ('Evaluating cross validation dataset ')
#                         for cdoc_no in cdoc:
#                             with open(self.valid_batch_dir+'batch'+str(cdoc_no)+'.pickle', 'rb') as f1:
#                                 dataset = pickle.load(f)
                            
#                                 batch_valid_dataset = dataset['batch_valid_dataset']
#                                 batch_valid_labels = dataset['batch_valid_labels']




In [16]:
obj_Train = Train()
graph_dict =  dynamic_RNN_model(vocab_size = obj_Train.vocab_size+1,)
obj_Train.train_network(graph_dict, num_batches = 20, epochs=1, verbose=True)

The num of hidden unit is:  3
The Vocab size is:  17156
The momentum is:  0.9
The learning_rate is:  0.01
The shape of embed_to_hid_wghts is:  (17156, 3)
The shape of embed_to_hid_layer is:  (?, ?, 3)
accuracy of the batch 0 is:  0.0

Average Loss for the batch 0 is:  10.1119

Using the new RNN State
accuracy of the batch 1 is:  0.0137061403509

Average Loss for the batch 1 is:  10.0953

Using the new RNN State
accuracy of the batch 2 is:  0.0130208333333

Average Loss for the batch 2 is:  9.97455

Using the new RNN State
accuracy of the batch 3 is:  0.0120192307692

Average Loss for the batch 3 is:  10.0192

Using the new RNN State
accuracy of the batch 4 is:  0.0137061403509

Average Loss for the batch 4 is:  10.0228

Using the new RNN State
accuracy of the batch 5 is:  0.0

Average Loss for the batch 5 is:  9.95868

Using the new RNN State
accuracy of the batch 6 is:  0.0

Average Loss for the batch 6 is:  9.93488

Using the new RNN State
accuracy of the batch 7 is:  0.0

Average Lo

In [66]:
accuracy 
 0.0

popopopopopopopoop

Using the new RNN State
accuracy 
 55.5921052632

popopopopopopopoop

Using the new RNN State
accuracy 
 61.8489583333

popopopopopopopoop

Using the new RNN State
accuracy 
 59.6514423077

popopopopopopopoop

Using the new RNN State
accuracy 
 54.5915570175

popopopopopopopoop

Using the new RNN State
accuracy 
 64.4301470588

popopopopopopopoop

Using the new RNN State
accuracy 
 61.6153492647

popopopopopopopoop

Using the new RNN State
accuracy 
 81.9647606383

popopopopopopopoop

Using the new RNN State
accuracy 
 63.6126893939

popopopopopopopoop

Using the new RNN State
accuracy 
 78.9463141026

popopopopopopopoop

Using the new RNN State
accuracy 
 69.6716994382

popopopopopopopoop

Using the new RNN State
accuracy 
 62.4280427632

popopopopopopopoop

IndentationError: unexpected indent (<ipython-input-66-58db6a2f57dc>, line 2)

In [None]:
# Batch size
B = 4
# (Maximum) number of time steps in this batch
T = 8
RNN_DIM = 128
NUM_CLASSES = 10
 
# The *acutal* length of the examples
example_len = [1, 2, 3, 8]
 
# The classes of the examples at each step (between 1 and 9, 0 means padding)
y = np.random.randint(1, 10, [B, T])
for i, length in enumerate(example_len):
    y[i, length:] = 0  
     
# The RNN outputs
rnn_outputs = tf.convert_to_tensor(np.random.randn(B, T, RNN_DIM), dtype=tf.float32)
 
# Output layer weights
W = tf.get_variable(
    name="W",
    initializer=tf.random_normal_initializer(),
    shape=[RNN_DIM, NUM_CLASSES])
 
# Calculate logits and probs
# Reshape so we can calculate them all at once
rnn_outputs_flat = tf.reshape(rnn_outputs, [-1, RNN_DIM])
logits_flat = tf.batch_matmul(rnn_outputs_flat, W)
probs_flat = tf.nn.softmax(logits_flat)
 
# Calculate the losses 
y_flat =  tf.reshape(y, [-1])
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits_flat, y_flat)
 
# Mask the losses
mask = tf.sign(tf.to_float(y_flat))
masked_losses = mask * losses
 
# Bring back to [B, T] shape
masked_losses = tf.reshape(masked_losses,  tf.shape(y))
 
# Calculate mean loss
mean_loss_by_example = tf.reduce_sum(masked_losses, reduction_indices=1) / example_len
mean_loss = tf.reduce_mean(mean_loss_by_example)