In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.python.framework import ops
import math

In [59]:
hparams=dict()
# Hyper-parameters
hparams['hidden_size']   = 512  # hidden layer's size
hparams['seq_length']    = 30   # number of steps to unroll
hparams['learning_rate'] = 1e-3
hparams['num_epochs'] = 60
hparams['dropout'] = 0.5

In [3]:
import vocab
data_builder = vocab.DataBuilder()
data_vocab = data_builder.build_vocabulary()

In [4]:
conf = dict()
conf['batch_size'] = 256
conf['vocab_size'] = len(data_vocab.word2ind)
conf['embed_size'] = 50

In [5]:
train = data_builder.get_data(word_vocab=data_vocab,dataset='train')
valid = data_builder.get_data(word_vocab=data_vocab,dataset='valid')
test = data_builder.get_data(word_vocab=data_vocab,dataset='test')
train.shape,valid.shape,test.shape

((929589,), (73760,), (82430,))

In [6]:
def create_placeholder(hparams,conf):
    X = tf.placeholder(shape=(None,hparams['seq_length']),dtype=tf.int32)
    Y = tf.placeholder(shape=(None,hparams['seq_length']),dtype=tf.float32)
    dropout = tf.placeholder(dtype=tf.float32)
    return X,Y,dropout

In [35]:
def initialize_parameters(hparams,conf):
    softmax_w = tf.get_variable(name='softmax_w', shape=[hparams['hidden_size'],conf['vocab_size']],
                                dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
    softmax_b = tf.get_variable(name='softmax_b', shape=[1,conf['vocab_size']],
                                dtype=tf.float32,initializer=tf.initializers.zeros())
    parameters = {"W":softmax_w,"b":softmax_b}
    return parameters

In [8]:
def add_embeddings(X,hparams,conf):
    #(batch,sequence_length,embedding_size)
    with tf.device('/cpu:0'):
        L = tf.get_variable(
            name="L", shape=(conf['vocab_size'], conf['embed_size']),
            initializer=tf.contrib.layers.xavier_initializer())
        embeddings = tf.nn.embedding_lookup(
            params=L, ids=X, name='embeddings')
        return embeddings

In [9]:
def forward_propagation(X,parameters,hparams,conf, keep = 1):
    X = tf.unstack(X,hparams['seq_length'],1)
    
    with tf.variable_scope("RNN1"):
        rnn_cell = rnn.BasicLSTMCell(hparams['hidden_size'])
        outputs1, states = rnn.static_rnn(cell=rnn_cell,inputs=X,dtype=tf.float32)
        for i in range(len(outputs1)):
            outputs1[i] = tf.nn.dropout(outputs1[i], keep_prob=keep)
            
    with tf.variable_scope("RNN2"):
        rnn_cell2 = rnn.BasicLSTMCell(hparams['hidden_size'])
        outputs2, states = rnn.static_rnn(cell=rnn_cell2, inputs=outputs1, dtype=tf.float32)

    output_sequence_logits = []
    for output in outputs2:
        output_sequence_logits.append(tf.matmul(output,parameters["W"]) + parameters["b"])
        
    return output_sequence_logits

In [10]:
def compute_cost(output_sequence_logits, Y, hparams, conf):
    w = tf.ones([conf['batch_size'], hparams['seq_length']])
    loss = tf.contrib.seq2seq.sequence_loss(logits=output_sequence_logits,
            targets=tf.cast(Y,tf.int32),
            weights=w, average_across_batch= True)
    return loss

In [11]:
def get_X_Y(data,seq_len):
    X=[]
    Y=[]
    num_of_x = int(math.ceil(data.shape[0]/seq_len))
    required_zero_pad = seq_len * num_of_x - data.shape[0] + 1
    data = np.pad(data,(0,required_zero_pad),mode='constant',constant_values=(0,0))
    num_of_x = data.shape[0]//seq_len
    for i in range(num_of_x):
        X.append(data[i*seq_len:(i+1)*seq_len])
        Y.append(data[(i*seq_len + 1):((i+1)*seq_len + 1)])
    return np.array(X),np.array(Y)

In [12]:
def get_mini_batches(X,Y,batch_size):
    num_of_input = X.shape[0]
    num_of_batches = int(num_of_input/batch_size)
    mini_batches = []
    for i in range(num_of_batches):
        start_ind = i*batch_size
        batch_x = X[start_ind:start_ind+batch_size]
        batch_y = Y[start_ind:start_ind+batch_size]
        mini_batches.append((batch_x,batch_y))
    '''if num_of_input%batch_size != 0:
        start_ind = num_of_batches*batch_size
        batch_x = X[start_ind:]
        batch_y = Y[start_ind:]
        mini_batches.append((batch_x,batch_y))
    '''
    return mini_batches

In [56]:
def model(X_train, Y_train, X_test, Y_test, hparams, conf, load=False,epoch_to_load=0):
    ops.reset_default_graph()
    costs = []
    
    X, Y, drop_out = create_placeholder(hparams,conf)
    
    parameters = initialize_parameters(hparams,conf)
        
    Z = add_embeddings(X, hparams, conf)
    logits = forward_propagation(Z,parameters,hparams, conf, keep=drop_out)
    
    output = tf.reshape(tf.concat(values=logits,axis=1), shape=(-1,hparams['seq_length'],conf['vocab_size']))
    cost = compute_cost(output,Y,hparams,conf)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=hparams['learning_rate']).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    saver = tf.train.Saver()
    
    batch_size = conf['batch_size']
    
    with tf.Session() as sess:
        sess.run(init) #run init
        
        if load:
            saver.restore(sess, './LSTM_checkpoints/LSTM_2_epoch_' + str(epoch_to_load) + '.ckpt')
        
        for epoch in range(hparams['num_epochs']):
            minibatch_cost = 0
            minibatches = get_mini_batches(X_train,Y_train,batch_size)
            num_minibatches = int(X_train.shape[0]/batch_size)
            
            for ind,minibatch in enumerate(minibatches):
                (minibatch_X,minibatch_Y) = minibatch
                _ , temp_cost = sess.run([optimizer,cost],feed_dict={X:minibatch_X,Y:minibatch_Y, drop_out:hparams['dropout']})
                minibatch_cost += temp_cost / num_minibatches
            costs.append(minibatch_cost)
            print("training cost after epoch:" + str(epoch) + " = " + str(minibatch_cost))
            
            minibatch_cost = 0
            num_minibatches = int(X_valid.shape[0]/batch_size)
            
            for ind,minibatch in enumerate(get_mini_batches(X_valid,Y_valid,batch_size)):
                (minibatch_X,minibatch_Y) = minibatch
                temp_cost = sess.run(cost,feed_dict={X:minibatch_X,Y:minibatch_Y,drop_out:1})
                minibatch_cost += temp_cost / num_minibatches
            print("validation cost after epoch:" + str(epoch) + " = " + str(minibatch_cost))
            
            if epoch % 5 ==0:
                saver.save(sess,'./LSTM_checkpoints/LSTM_2_epoch_' + str(epoch) + '.ckpt')
            
        minibatch_cost = 0
        num_minibatches = int(X_test.shape[0]/batch_size)
        for ind,minibatch in enumerate(get_mini_batches(X_test,Y_test,batch_size)):
            (minibatch_X,minibatch_Y) = minibatch
            temp_cost = sess.run(cost,feed_dict={X:minibatch_X,Y:minibatch_Y, drop_out:1})
            minibatch_cost += temp_cost / num_minibatches
        print("Test cost after epoch:" + str(epoch) + " = " + str(minibatch_cost))
                
    return costs,parameters

In [50]:
X_train,Y_train=get_X_Y(train,hparams['seq_length'])
X_valid,Y_valid=get_X_Y(valid,hparams['seq_length'])
X_test,Y_test=get_X_Y(test,hparams['seq_length'])

In [60]:
_, parameters = model(X_train, Y_train, X_valid, Y_valid,hparams,conf,load=False,epoch_to_load=0)

training cost after epoch:0 = 6.96976686903
validation cost after epoch:0 = 6.67788161172
training cost after epoch:1 = 6.68340827217
validation cost after epoch:1 = 6.64372316996
training cost after epoch:2 = 6.66476753724
validation cost after epoch:2 = 6.6393081877
training cost after epoch:3 = 6.66222205832
validation cost after epoch:3 = 6.63623581992
training cost after epoch:4 = 6.63096858253
validation cost after epoch:4 = 6.55859576331
training cost after epoch:5 = 6.52912417325
validation cost after epoch:5 = 6.47276968426
training cost after epoch:6 = 6.45873050059
validation cost after epoch:6 = 6.41424809562
training cost after epoch:7 = 6.40194779782
validation cost after epoch:7 = 6.3720937305
training cost after epoch:8 = 6.3531440191
validation cost after epoch:8 = 6.3299735917
training cost after epoch:9 = 6.31207664742
validation cost after epoch:9 = 6.29673888948
training cost after epoch:10 = 6.27186824468
validation cost after epoch:10 = 6.26197327508
training cos

In [61]:
np.exp(5.1662279235)

175.25252319498784

Things to do<br>
<ul>
<li>Hyperparameter Tuning</li>
<li>Add more LSTM layer with dropout in between</li>
</ul>