In [1]:
"""
    BEFORE RUNNING THIS CODE YOU SHOULD FIRST RUN THE "DATA_BUILDER.PY" TO FIRST EXTRACT, CLEAN AND LOAD THE DATA
    INTO PICKLE FILES AND THEN THIS CODE WILL PART COME IN HANDY.
"""


import numpy as np
import time
import os
import urllib.request
from six.moves import cPickle as pickle


import tensorflow as tf
from tensorflow.models.rnn.ptb import reader
from gensim import corpora

import matplotlib.pyplot as plt


In [2]:
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

In [9]:
def dynamic_RNN_model(
    num_hid_units = 3,
    vocab_size = 7,
    momentum = 0.9,
    learning_rate = 0.01
    ):
    print ('The num of hidden unit is: ', num_hid_units)
    print ('The Vocab size is: ', vocab_size)
    print ('The momentum is: ', momentum)
    print ('The learning_rate is: ', learning_rate)
    
    
    num_classes = vocab_size

    reset_graph()
    
    x = tf.placeholder(tf.int32, shape = [None, None], name='input_placeholder')
    y = tf.placeholder(tf.int32, shape = [None, None], name='output_placeholdr')
    batch_size = tf.shape(x)[0]
    

    # ENBEDDING(INPUT) LAYER OPERATION
    # Creating an Embedding matrix with a random weight for all vacab to hidden_matrix
    embed_to_hid_wghts = tf.get_variable('embedding_matrix', [vocab_size, num_hid_units])
    embed_to_hid_layer = tf.nn.embedding_lookup(embed_to_hid_wghts, x)
    print ('The shape of embed_to_hid_wghts is: ', embed_to_hid_wghts.get_shape())
    print ('The shape of embed_to_hid_layer is: ', embed_to_hid_layer.get_shape())


    # HIDDEN LAYER OPERATION
    rnn_cell = tf.nn.rnn_cell.LSTMCell(num_hid_units, state_is_tuple=True)
    init_state = rnn_cell.zero_state(batch_size, tf.float32)  # Each sequence will hava a state that it passes to its next sequence
    rnn_outputs, new_state = tf.nn.dynamic_rnn(cell=rnn_cell,
                                            # sequence_length=X_lengths,
                                            initial_state=init_state,
                                            inputs=embed_to_hid_layer)
    

    # OUTPUT LAYER OPERATION
    # Initialize the weight and biases for the output layer. We use variable scope because we would like to share the weights 
    with tf.variable_scope('output_layer'):
        hid_to_output_wght = tf.get_variable('hid_to_output_wght', [num_hid_units, num_classes])
        output_bias = tf.get_variable('output_bias',
                                      [num_classes],
                                      initializer=tf.constant_initializer(0.0))
    
    rnn_outputs = tf.reshape(rnn_outputs, [-1, num_hid_units])  
    hid_to_ouptut_layer = tf.matmul(rnn_outputs, hid_to_output_wght) +  output_bias
    output_state = tf.nn.softmax(hid_to_ouptut_layer, name=None)
 
    
    
    # CALCULATING LOSS, OPTIMIZING THE COST FUNCTION, MEASURING ACCURACY
    y_reshaped = tf.reshape(y, [-1])
    loss_CE = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(hid_to_ouptut_layer, y_reshaped))
    
    # The sparse_softmax uses dtype as int32 or int64
    optimizer = tf.train.MomentumOptimizer(learning_rate, 
                                            momentum, 
                                            use_locking=False, 
                                            name='Momentum', 
                                            use_nesterov=True).minimize(loss_CE)
    # y_ = tf.reshape(y, [-1])
    # correct_prediction = tf.equal(tf.arg_max(output_state,1), tf.arg_max(y_ ,1))
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    

    # Returns graph objects
    return dict(
        x=x,
        y=y,
        batch_size = batch_size,
        init_state = init_state,
        new_state = new_state,
        loss_CE = loss_CE,
        optimizer = optimizer,
        training_prediction = output_state
    )



In [12]:
class Train():
    def __init__(self):
        self.train_batch_dir = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Word-Nets/training_batch/'
        dictionary_dir = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/Word-Search-NNets/Word-Nets/dictionary.txt'
        self.vocab_size = len(corpora.Dictionary.load_from_text(dictionary_dir))

    def accuracy(self, predictions, labels, labels_one_hot = None):
        # The input labels are a One-Hot Vector
        if labels_one_hot:
            return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
                  / predictions.shape[0])
        else:
            return (100.0 * np.sum(np.argmax(predictions, 1) == np.reshape(labels, [-1]))
                  / predictions.shape[0])
        

    def train_network(self, graph_dict, num_batches, epochs=1, verbose=None ):
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            
            for epoch in np.arange(epochs):
                new_hid_layer_state = None
                training_loss = 0
                
                for no in np.arange(num_batches):
                    with open(self.train_batch_dir+'batch'+str(no)+'.pickle', 'rb') as f:
                        dataset = pickle.load(f)
                        
                        batch_train_dataset = dataset['batch_train_dataset']
                        batch_train_labels = dataset['batch_train_labels']
                        
                        if not new_hid_layer_state:
                            print ('Using the zero init RNN State')
                            feed_dict= {graph_dict['x']: batch_train_dataset, 
                                        graph_dict['y']: batch_train_labels}
                        else:
                            print ('Using the new RNN State')
                            feed_dict= {graph_dict['x']: batch_train_dataset, 
                                        graph_dict['y']: batch_train_labels,
                                        graph_dict['init_state'] : new_hid_layer_state}

                        bs, nwst, loss, opt, tp = sess.run([graph_dict['batch_size'],
                                                        graph_dict['new_state'],
                                                        graph_dict['loss_CE'],
                                                        graph_dict['optimizer'],
                                                        graph_dict['training_prediction']], 
                                                        feed_dict=feed_dict)
                        new_hid_layer_state = nwst
                        training_loss += loss
                        acc = self.accuracy(tp, batch_train_labels)

                        print ('accuracy of the batch %d is: '%no, acc)
                        print ('')
                        print ('Average Loss for the batch %d is: '%no, loss)
                        print ('')
                
                print ('All %d Batches Done..'%num_batches)
                print ('')
                
                if verbose:
                    print("Average training loss for Epoch", epoch, ":", training_loss/num_batches)
                        
#                     if (num_batches%20 ==0 and num_batches!=0):
#                         print ('Evaluating cross validation dataset ')
#                         for cdoc_no in cdoc:
#                             with open(self.valid_batch_dir+'batch'+str(cdoc_no)+'.pickle', 'rb') as f1:
#                                 dataset = pickle.load(f)
                            
#                                 batch_valid_dataset = dataset['batch_valid_dataset']
#                                 batch_valid_labels = dataset['batch_valid_labels']




In [13]:
obj_Train = Train()
graph_dict =  dynamic_RNN_model(num_classes = obj_Train.vocab_size,)
obj_Train.train_network(graph_dict, num_batches = 5, epochs=1, verbose=True)

The num of hidden unit is:  100
The Vocab size is:  17155
The batch_size is:  128
The learning_rate is:  0.0001
[[17153  2636 16270 ...,     0     0     0]
 [17153  1044 10072 ...,     0     0     0]
 [17153 14404  2686 ...,     0     0     0]
 ..., 
 [17153 11839  1932 ...,     0     0     0]
 [17153 13398 14528 ...,     0     0     0]
 [17153  2636 11642 ...,     0     0     0]]
All 5 Batches Done..

Average training loss for Epoch 0 : 0.0


In [None]:
accuracy 
 0.0

popopopopopopopoop

Using the new RNN State
accuracy 
 55.5921052632

popopopopopopopoop

Using the new RNN State
accuracy 
 61.8489583333

popopopopopopopoop

Using the new RNN State
accuracy 
 59.6514423077

popopopopopopopoop

Using the new RNN State
accuracy 
 54.5915570175

popopopopopopopoop

Using the new RNN State
accuracy 
 64.4301470588

popopopopopopopoop

Using the new RNN State
accuracy 
 61.6153492647

popopopopopopopoop

Using the new RNN State
accuracy 
 81.9647606383

popopopopopopopoop

Using the new RNN State
accuracy 
 63.6126893939

popopopopopopopoop

Using the new RNN State
accuracy 
 78.9463141026

popopopopopopopoop

Using the new RNN State
accuracy 
 69.6716994382

popopopopopopopoop

Using the new RNN State
accuracy 
 62.4280427632

popopopopopopopoop