In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.python.framework import ops

In [2]:
hparams=dict()
# Hyper-parameters
hparams['hidden_size']   = 100  # hidden layer's size
hparams['seq_length']    = 20   # number of steps to unroll
hparams['learning_rate'] = 1e-3
hparams['num_epochs'] = 10
hparams['dropout'] = 0.9

In [3]:
import vocab
data_builder = vocab.DataBuilder()
data_vocab = data_builder.build_vocabulary()

In [4]:
conf = dict()
conf['batch_size'] = 256
conf['vocab_size'] = len(data_vocab.word2ind)
conf['embed_size'] = 50

In [5]:
train = data_builder.get_data(word_vocab=data_vocab,dataset='train')
valid = data_builder.get_data(word_vocab=data_vocab,dataset='valid')
test = data_builder.get_data(word_vocab=data_vocab,dataset='test')
train.shape,valid.shape,test.shape

((929589,), (73760,), (82430,))

In [6]:
def create_placeholder(hparams,conf):
    X = tf.placeholder(shape=(None,hparams['seq_length']),dtype=tf.int32)
    Y = tf.placeholder(shape=(None,hparams['seq_length']),dtype=tf.float32)
    return X,Y

In [7]:
def initialize_parameters(hparams,conf):
    softmax_w = tf.get_variable(name='softmax_w', shape=[hparams['hidden_size'],conf['vocab_size']],
                                dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
    softmax_b = tf.get_variable(name='softmax_b', shape=[1,conf['vocab_size']],
                                dtype=tf.float32,initializer=tf.initializers.zeros())
    parameters = {"W":softmax_w,"b":softmax_b}
    return parameters

In [8]:
def add_embeddings(X,hparams,conf):
    #(batch,sequence_length,embedding_size)
    with tf.device('/cpu:0'):
        L = tf.get_variable(
            name="L", shape=(conf['vocab_size'], conf['embed_size']),
            initializer=tf.contrib.layers.xavier_initializer())
        embeddings = tf.nn.embedding_lookup(
            params=L, ids=X, name='embeddings')
        return embeddings

In [9]:
def forward_propagation(X,parameters,hparams,conf):
    X = tf.unstack(X,hparams['seq_length'],1)
    
    rnn_cell = rnn.BasicLSTMCell(hparams['hidden_size'])
    outputs, states = rnn.static_rnn(cell=rnn_cell,inputs=X,dtype=tf.float32)
    output_sequence_logits = []
    for output in outputs:
        output_sequence_logits.append(tf.matmul(output,parameters["W"]) + parameters["b"])
    print(output_sequence_logits[0].shape)
    return output_sequence_logits

In [10]:
def compute_cost(output_sequence_logits, Y, hparams, conf):
    w = tf.ones([conf['batch_size'], hparams['seq_length']])
    loss = tf.contrib.seq2seq.sequence_loss(logits=output_sequence_logits,
            targets=tf.cast(Y,tf.int32),
            weights=w, average_across_batch= True)
    return loss

In [11]:
def get_X_Y(data,seq_len):
    X=[]
    Y=[]
    num_of_x = data.shape[0]//seq_len
    for i in range(num_of_x):
        X.append(data[i*seq_len:(i+1)*seq_len])
        Y.append(data[i*seq_len + 1:(i+1)*seq_len + 1])
    return np.array(X),np.array(Y)

In [12]:
def get_mini_batches(X,Y,batch_size):
    num_of_input = X.shape[0]
    num_of_batches = int(num_of_input/batch_size)
    mini_batches = []
    for i in range(num_of_batches):
        start_ind = i*batch_size
        batch_x = X[start_ind:start_ind+batch_size]
        batch_y = Y[start_ind:start_ind+batch_size]
        mini_batches.append((batch_x,batch_y))
    '''if num_of_input%batch_size != 0:
        start_ind = num_of_batches*batch_size
        batch_x = X[start_ind:]
        batch_y = Y[start_ind:]
        mini_batches.append((batch_x,batch_y))
    '''
    return mini_batches

In [13]:
def model(X_train, Y_train, X_test, Y_test, hparams, conf):
    ops.reset_default_graph()
    costs = []
    
    X, Y = create_placeholder(hparams,conf)
    parameters = initialize_parameters(hparams,conf)
    Z = add_embeddings(X, hparams, conf)
    logits = forward_propagation(Z,parameters,hparams, conf)
    
    output = tf.reshape(tf.concat(values=logits,axis=1), shape=(-1,hparams['seq_length'],conf['vocab_size']))
    cost = compute_cost(output,Y,hparams,conf)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=hparams['learning_rate']).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init) #run init
        
        batch_size = conf['batch_size']
        num_minibatches = int(X_train.shape[0]/batch_size)
        
        for epoch in range(hparams['num_epochs']):
            minibatch_cost = 0
            minibatches = get_mini_batches(X_train,Y_train,batch_size)
            for ind,minibatch in enumerate(minibatches):
                (minibatch_X,minibatch_Y) = minibatch
                _ , temp_cost = sess.run([optimizer,cost],feed_dict={X:minibatch_X,Y:minibatch_Y})
                minibatch_cost += temp_cost / num_minibatches
            costs.append(minibatch_cost)
            print("training cost after epoch:" + str(epoch) + " = " + str(minibatch_cost))
            
            minibatch_cost = 0
            num_minibatches = int(X_valid.shape[0]/batch_size)
            for ind,minibatch in enumerate(get_mini_batches(X_valid,Y_valid,batch_size)):
                (minibatch_X,minibatch_Y) = minibatch
                temp_cost = sess.run(cost,feed_dict={X:minibatch_X,Y:minibatch_Y})
                minibatch_cost += temp_cost / num_minibatches
            print("validation cost after epoch:" + str(epoch) + " = " + str(minibatch_cost))
        
        minibatch_cost = 0
        num_minibatches = int(X_test.shape[0]/batch_size)
        for ind,minibatch in enumerate(get_mini_batches(X_test,Y_test,batch_size)):
            (minibatch_X,minibatch_Y) = minibatch
            temp_cost = sess.run(cost,feed_dict={X:minibatch_X,Y:minibatch_Y})
            minibatch_cost += temp_cost / num_minibatches
        print("Test cost after epoch:" + str(epoch) + " = " + str(minibatch_cost))
                
    return costs,parameters

In [14]:
X_train,Y_train=get_X_Y(train,hparams['seq_length'])
X_valid,Y_valid=get_X_Y(valid,hparams['seq_length'])
X_test,Y_test=get_X_Y(test,hparams['seq_length'])

In [15]:
_, parameters = model(X_train, Y_train, X_valid, Y_valid,hparams,conf)

(?, 10000)
training cost after epoch:0 = 7.05222058612
validation cost after epoch:0 = 6.61612427235
training cost after epoch:1 = 82.5286626617
validation cost after epoch:1 = 6.47134852409
training cost after epoch:2 = 80.9437227448
validation cost after epoch:2 = 6.37231449286
training cost after epoch:3 = 79.5073924065
validation cost after epoch:3 = 6.25257352988
training cost after epoch:4 = 77.5542296966
validation cost after epoch:4 = 6.10589118799
training cost after epoch:5 = 76.0143843293
validation cost after epoch:5 = 6.01487825314
training cost after epoch:6 = 74.863173008
validation cost after epoch:6 = 5.93931269646
training cost after epoch:7 = 73.8668599725
validation cost after epoch:7 = 5.87432465951
training cost after epoch:8 = 72.9658233523
validation cost after epoch:8 = 5.8117120266
training cost after epoch:9 = 72.0451683203
validation cost after epoch:9 = 5.74589812756
Test cost after epoch:9 = 5.74589812756


In [18]:
2**5.74589812756

53.664574275601254