In [4]:
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import urllib.request

import tensorflow as tf
from tensorflow.models.rnn.ptb import reader

In [5]:
file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)
    
with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
print('The Vocab Size is: ', vocab_size)
print('The vocab_to_idx  is: ', vocab_to_idx)
print ('The data lenght is: ', len(data))

Data length: 1115394
The Vocab Size is:  65
The vocab_to_idx  is:  {'h': 0, 'A': 1, 'p': 3, 'j': 64, 'U': 6, 'y': 5, 'J': 49, 'N': 15, '3': 34, 'L': 8, 'T': 9, '&': 11, '\n': 12, 'B': 13, 'W': 14, 'Q': 19, 'c': 16, 'Y': 17, 'V': 18, 'X': 21, 'E': 59, '$': 7, 'H': 58, 'n': 24, 'f': 25, 'a': 4, 'K': 53, 's': 26, '?': 27, 'I': 29, 'k': 30, 'z': 46, 'o': 31, ':': 32, 'O': 20, 'g': 33, 'b': 35, 'Z': 36, 'x': 37, ' ': 50, 'M': 38, 'u': 40, '-': 41, 'e': 42, "'": 43, '!': 44, 'S': 51, ',': 45, 'r': 23, 'i': 48, 'l': 47, 'P': 52, 'm': 10, 'R': 54, 'D': 28, 't': 55, 'w': 2, 'C': 56, ';': 57, 'q': 61, 'F': 60, 'v': 62, '.': 63, 'G': 22, 'd': 39}
The data lenght is:  1115394


In [6]:
def reset_graph():  # Reset the graph
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

In [25]:
def dynamic_RNN_model(
    batch_size = 2,
    num_hid_units = 3,
    num_classes = 6,
    num_sequences = 4,
    momentum = 0.9,
    learning_rate = 0.5):

    vocab_size = num_classes
    
    reset_graph()
    
    x = tf.placeholder(tf.int32, shape = [batch_size, num_sequences], name='input_placeholder')
    y = tf.placeholder(tf.int32, shape = [batch_size, num_sequences], name='output_placeholder')

    
    # ENBEDDING(INPUT) LAYER OPERATION
    # Creating an Embedding matrix with a random weight for all vacab to hidden_matrix
    embed_to_hid_wghts = tf.get_variable('embedding_matrix', [vocab_size, num_hid_units])
    # Normally we convert the input index into a one hot matrix and then multiply it to the embedded weights, When we do so, we get the same embed weight corresponding to 1's in the one-hot vector but in a different shape. The below operation does all that in a single shot.
    embed_to_hid_layer = tf.nn.embedding_lookup(embed_to_hid_wghts, x)

    # HIDDEN LAYER OPERATION
    rnn_cell = tf.nn.rnn_cell.LSTMCell(num_hid_units, state_is_tuple=True)
    init_state = rnn_cell.zero_state(batch_size, tf.float32)  # Each sequence will hava a state that it passes to its next sequence
    rnn_outputs, new_state = tf.nn.dynamic_rnn(
                                        cell=rnn_cell,
                                        # sequence_length=X_lengths,
                                        initial_state=init_state,
                                        inputs=embed_to_hid_layer)
    

    # OUTPUT LAYER OPERATION
    # Initialize the weight and biases for the output layer. We use variable scope because we would like to share the weights 
    with tf.variable_scope('output_layer'):
        hid_to_output_wght = tf.get_variable('hid_to_output_wght', 
                                                 [num_hid_units, num_classes], 
                                                 initializer = tf.random_normal_initializer())
        output_bias = tf.get_variable('output_bias',
                                      [num_classes],
                                      initializer = tf.random_normal_initializer())
    # The variable rnn_output is a Tensor of shape of [Batch_size x num_sequence x num_hid_units] and,
    # The hid_to_output_wght is in the shape of [num_hid_units x num_classes]
    # And We want an output with shape [Batch_size x num_sequence x num_classes]
    # We horizontlly stack all the batches to form a matrix of [(Batch_size x num_sequence]) x num_classes]
    rnn_outputs = tf.reshape(rnn_outputs, [-1, num_hid_units])  
    hid_to_ouptut_layer = tf.matmul(rnn_outputs, hid_to_output_wght) +  output_bias
    output_state = tf.nn.softmax(hid_to_ouptut_layer, name=None)
 
    
    
    # CALCULATING LOSS, OPTIMIZING THE COST FUNCTION, MEASURING ACCURACY
    loss_CE = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(hid_to_ouptut_layer, tf.reshape(y, [-1])))
    # sparse_softmax_cross_entropy_with_logits automatically converts the y's into on hot vectors and perform the softmax operation
    # When using softmax_cross_entropy_with_logits, we have to first convert the y's into one-hot vector
    # The sparse_softmax uses dtype as int32 or int64
    optimizer = tf.train.MomentumOptimizer(learning_rate, 
                                            momentum, 
                                            use_locking=False, 
                                            name='Momentum', 
                                            use_nesterov=True).minimize(loss_CE)
    # y_ = tf.reshape(y, [-1])
    # correct_prediction = tf.equal(tf.arg_max(output_state,1), tf.arg_max(y_ ,1))
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    

    # Returns graph objects
    return dict(
        x=x,
        y=y,
        embed_to_hid_wghts = embed_to_hid_wghts,
        hid_to_output_wght = hid_to_output_wght,
        init_state = init_state,
        new_state = new_state,
        loss_CE = loss_CE,
        optimizer = optimizer,
        training_prediction = output_state
        # accuracy = tf.Variable([[1,2,3]])
    )

In [26]:
def accuracy(predictions, labels, labels_one_hot = None):
    # The input labels are a One-Hot Vector
    if labels_one_hot:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
              / predictions.shape[0])
    else:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.reshape(labels, [-1]))
              / predictions.shape[0])
    

def train_network(graph_dict):
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        
#         x_new = np.array([[1,4,2,2],[1,4,3,0]])
#         y_new = np.array([[4,2,2,5],[4,3,0,5]])
        
        training_data = np.array([[1,2,3,4], [1,3,4,0], [1,4,2,2], [1,4,3,0]])
        training_labels = np.array([[2,3,4,5], [3,4,0,5], [4,2,2,5], [4,3,0,5]])
        epochs = 50
        for epoch in np.arange(epochs):
            new_hid_layer_state = None
            for i in [2,4]:
    #         print (training_data[0:2,:])
                batch_data = training_data[i-2:i,:]
                batch_labels = training_labels[i-2:i,:]

                batch_size=len(batch_data)
                num_hidden_layer = 3
                num_classes = 6, 
                num_sequences = 4

    #             print (batch_data)
    #             print (batch_labels)

    #             print (batch_size)
                if not new_hid_layer_state: 
                    feed_dict= {graph_dict['x']: batch_data, 
                                graph_dict['y']: batch_labels}
                else:
                    print ('Using the new RNN State')
                    feed_dict= {graph_dict['x']: batch_data, 
                                graph_dict['y']: batch_labels, 
                                graph_dict['init_state'] : new_hid_layer_state}

                a, b, c, e, j, k, prediction= sess.run([graph_dict['embed_to_hid_wghts'],
                                          graph_dict['hid_to_output_wght'],
                                          graph_dict['init_state'],
                                         graph_dict['new_state'],
                                         graph_dict['loss_CE'],
                                         graph_dict['optimizer'],
                                         graph_dict['training_prediction']], feed_dict=feed_dict)
                new_hid_layer_state = e

                acc = accuracy(prediction, batch_labels)

               
                print ('loss_CE \n', j)
                print ('')
                print ('optimizer \n', k)
                print ('')
                print ('training_prediction \n', prediction)
                print ('')
                print ('accuracy \n', acc)
                # print ('')
                print ('')
                print ('popopopopopopopoop')
                print ('')
                print ('')


                
                
graph_dict = dynamic_RNN_model()
train_network(graph_dict)


embed_to_hid_wghts 
 [[ 0.47581863  0.60052562  0.34422788]
 [ 0.37818438 -0.29039779  0.08285818]
 [-0.69653338  0.5867185   0.52296889]
 [ 0.26172379 -0.18788069  0.64254165]
 [-0.35197937  0.07622467  0.00926965]
 [ 0.31608397  0.1814127   0.3699438 ]]

hid_to_output_wght 
 [[ 0.2254542   0.76846129  0.76811731 -0.5891338  -0.48305503  0.63987815]
 [ 0.63931555  0.34062961 -0.02504358  1.30996978  0.27690715 -0.86971796]
 [ 0.09140234  1.47284496  0.66085136 -0.58494473 -0.25401527 -1.34911776]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.49446142  0.62728369  0.36087066]
 [ 0.37252903 -0.29981968  0.06920109]
 [-0.6563071   0.59415144  0.52797616]
 [ 0.26044306 -0.1831713   0.65539789]
 [-0.33462539  0.08461424  0.00100037]
 [ 0.31608397  0.1814127   0.3699438 ]]

hid_to_output_wght 
 [[ 0.23163611  0.7678833   0.75259668 -0.58759427 -0.46313974  0.62834007]
 [ 0.64654952  0.34110025 -0.04068306  1.31661916  0.30249584 -0.89402121]
 [ 0.06550196  1.48409

In [None]:
embed_to_hid_wghts 
 [[ 0.3380937   0.62644857 -0.50651783]
 [-0.29930362 -0.12048948 -0.28731066]
 [-0.14054778 -0.29635972  0.54727423]
 [-0.22367907 -0.08781286 -0.26717448]
 [ 0.18986416 -0.23878893  0.28529114]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.08064198  0.48962995 -0.20349774  0.93551141  1.10989237 -0.09024534]
 [-1.40799832 -0.54062182  0.55545753 -0.69321829  1.76262963  1.02554905]
 [ 0.08176671  0.8214168  -0.04816911 -0.08106466  1.14568615  0.47156352]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.32880539  0.60987324 -0.5208782 ]
 [-0.29196075 -0.07690518 -0.3145059 ]
 [-0.14273489 -0.29235008  0.4737488 ]
 [-0.205286   -0.07094114 -0.28186166]
 [ 0.2308912  -0.17843479  0.16600892]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.07022619  0.49749279 -0.20366152  0.94769871  1.11564815 -0.10547174]
 [-1.41005576 -0.54169184  0.57377404 -0.688968    1.74669635  1.02204299]
 [ 0.08341606  0.81439167 -0.01319226 -0.08877085  1.10342515  0.49192959]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.32196832  0.58833796 -0.53854603]
 [-0.26942423 -0.03559221 -0.35226679]
 [-0.13686906 -0.2935403   0.45169213]
 [-0.24464317 -0.11543481 -0.21490963]
 [ 0.26344836 -0.13511662  0.07119273]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.06100929  0.51048219 -0.18476437  0.96028054  1.11092985 -0.13600489]
 [-1.4140743  -0.53806293  0.59210831 -0.67119479  1.73742902  0.99559247]
 [ 0.08561818  0.81238657  0.0024346  -0.08402029  1.08158493  0.49319538]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.31925902  0.56254381 -0.56029242]
 [-0.28837174 -0.03665181 -0.33223784]
 [-0.16097878 -0.31539732  0.39288363]
 [-0.24515444 -0.11013826 -0.20326014]
 [ 0.29268661 -0.07874571 -0.03846072]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.05155766  0.53035623 -0.18145543  0.99751061  1.0944643  -0.17050079]
 [-1.42011654 -0.52965486  0.61078757 -0.64882976  1.71287549  0.97673589]
 [ 0.08085803  0.81318581  0.0329436  -0.08315013  1.05101943  0.49634269]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.31792304  0.5345884  -0.58306807]
 [-0.2827493  -0.02722285 -0.32887784]
 [-0.16503975 -0.32530734  0.37609786]
 [-0.30772239 -0.15948702 -0.11523932]
 [ 0.31742072 -0.03335676 -0.12862359]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.03845656  0.55313092 -0.14758834  1.02118254  1.06965697 -0.21290603]
 [-1.42841887 -0.52147108  0.6361047  -0.62576222  1.69780636  0.94353884]
 [ 0.0782906   0.81398445  0.05473784 -0.07832598  1.03014159  0.49237093]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.31960136  0.50417042 -0.60719377]
 [-0.3201943  -0.03736926 -0.28597298]
 [-0.19113201 -0.3568978   0.33190653]
 [-0.30679542 -0.15384972 -0.09281906]
 [ 0.3481847   0.02986821 -0.21913253]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.02857447  0.58189541 -0.14717355  1.06281495  1.03008962 -0.2342684 ]
 [-1.43512344 -0.51079863  0.65353405 -0.60568678  1.66687417  0.93299824]
 [ 0.07220328  0.81576508  0.08572704 -0.08138242  0.99988604  0.49900043]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.32174534  0.47334909 -0.63192356]
 [-0.31485721 -0.02994407 -0.26385167]
 [-0.19329502 -0.36830604  0.31833032]
 [-0.38267958 -0.18866307 -0.00360502]
 [ 0.37593722  0.08257946 -0.29538983]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 1.00989568  0.6102578  -0.11231213  1.08220088  0.98987323 -0.25798297]
 [-1.44607496 -0.50208491  0.67566669 -0.58338851  1.65401196  0.90366739]
 [ 0.06734012  0.81675518  0.10548614 -0.07927596  0.98389089  0.49700302]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.32628816  0.44121364 -0.65709609]
 [-0.36176699 -0.02711541 -0.2157145 ]
 [-0.21030997 -0.39944798  0.28146747]
 [-0.37147105 -0.17825809  0.02250667]
 [ 0.41373372  0.15471686 -0.36894125]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.99688077  0.6419329  -0.12391476  1.11102462  0.94169527 -0.24568626]
 [-1.45309234 -0.49257994  0.68887353 -0.57068813  1.63050675  0.89877784]
 [ 0.06067769  0.81747341  0.13727701 -0.08815817  0.96042281  0.5035066 ]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.3317118   0.40958706 -0.68320554]
 [-0.35548568 -0.00919423 -0.18735732]
 [-0.20836516 -0.41046408  0.26973081]
 [-0.44733778 -0.19698144  0.10468995]
 [ 0.4479053   0.21359268 -0.43113303]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.97366387  0.67198259 -0.09619598  1.12129414  0.90047419 -0.24928631]
 [-1.46522224 -0.48469913  0.70688891 -0.55085677  1.6256634   0.87002361]
 [ 0.05364103  0.81806087  0.15630445 -0.08849688  0.95178199  0.49990788]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.34013414  0.37720862 -0.71002203]
 [-0.40955186  0.00421353 -0.14206004]
 [-0.21619911 -0.44090521  0.23701996]
 [-0.42734966 -0.18334752  0.13171747]
 [ 0.48922092  0.28612548 -0.48882473]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.96325541  0.70350796 -0.11851542  1.14011347  0.86167443 -0.22810341]
 [-1.47194898 -0.47643614  0.7152074  -0.54148304  1.6140511   0.86240739]
 [ 0.04584155  0.81852543  0.18545079 -0.10090862  0.93623304  0.50605714]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.35015768  0.34584153 -0.73817921]
 [-0.40756091  0.02889854 -0.11470031]
 [-0.21225104 -0.45232645  0.2266593 ]
 [-0.50136173 -0.1928871   0.20541042]
 [ 0.52587134  0.34453276 -0.53787291]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.94008154  0.73304278 -0.09719191  1.14601266  0.83517647 -0.23518908]
 [-1.48449361 -0.46949965  0.73001909 -0.52386463  1.61879718  0.83083934]
 [ 0.03572953  0.81902909  0.20412439 -0.10308843  0.93570006  0.49970472]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.36417753  0.31403467 -0.76765066]
 [-0.47613597  0.04310007 -0.07589169]
 [-0.2147115  -0.48430932  0.19611135]
 [-0.4751859  -0.17796761  0.23164995]
 [ 0.5653525   0.41000047 -0.58125544]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.93852907  0.76292562 -0.12640965  1.1618042   0.81721872 -0.23213546]
 [-1.4909327  -0.46222264  0.73323154 -0.5144518   1.61929274  0.81688064]
 [ 0.02615431  0.81981009  0.22750756 -0.11737233  0.92871666  0.50638306]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.38045663  0.2835497  -0.79881382]
 [-0.48383501  0.06789564 -0.05320209]
 [-0.21045049 -0.49696657  0.18666737]
 [-0.55003315 -0.18372659  0.29807097]
 [ 0.60070264  0.4628484  -0.61905676]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.91809708  0.79073238 -0.10784958  1.16891217  0.81428283 -0.26224238]
 [-1.50382245 -0.4562107   0.74517113 -0.497026    1.63392508  0.77976072]
 [ 0.01256704  0.82045621  0.24430948 -0.12012511  0.93740392  0.49658784]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.40181434  0.25280243 -0.831936  ]
 [-0.57580554  0.07724339 -0.0207165 ]
 [-0.20913847 -0.53179491  0.15566584]
 [-0.51724607 -0.16824347  0.32310891]
 [ 0.63535583  0.51936972 -0.65067464]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[  9.26333904e-01   8.18231642e-01  -1.40824139e-01   1.18521786e+00
    8.21303606e-01  -2.88330346e-01]
 [ -1.51021516e+00  -4.49952006e-01   7.44483531e-01  -4.86209899e-01
    1.64640832e+00   7.57282972e-01]
 [  1.39049347e-03   8.21513593e-01   2.62050778e-01  -1.34847030e-01
    9.39324439e-01   5.01767099e-01]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.42609683  0.22349331 -0.86711591]
 [-0.59584355  0.09945464 -0.00321346]
 [-0.20435032 -0.54583448  0.14592993]
 [-0.59312719 -0.17237882  0.38549191]
 [ 0.66823727  0.56561756 -0.68035412]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.90744805  0.84373778 -0.12042141  1.19640601  0.84430766 -0.34954548]
 [-1.52402461 -0.44493151  0.75398344 -0.4661057   1.67086804  0.71200812]
 [-0.01568454  0.82224679  0.27534688 -0.13654925  0.95781565  0.48802385]]

popopopopopopopoop


Using the new RNN State
embed_to_hid_wghts 
 [[ 0.45648074  0.19397728 -0.90477067]
 [-0.71396017  0.10407421  0.0264143 ]
 [-0.19574505 -0.58289045  0.1107368 ]
 [-0.55007786 -0.15595247  0.41006908]
 [ 0.69675589  0.61551386 -0.70317632]
 [-0.52738571 -0.42646405 -0.41866285]]

hid_to_output_wght 
 [[ 0.92228705  0.86860585 -0.15410151  1.21240902  0.87749249 -0.40476024]
 [-1.53108013 -0.43982607  0.75114506 -0.45526844  1.69664621  0.68018109]
 [-0.02886727  0.82328796  0.28935641 -0.15273564  0.96807677  0.49208111]]

popopopopopopopoop


embed_to_hid_wghts 
 [[ 0.49075428  0.16590841 -0.94515282]