In [6]:
import tensorflow as tf
import numpy as np
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import rnn

#Defining some hyper-params
n_hidden = 2       #this is the parameter for input_size in the basic LSTM cell
input_size = 2      #n_hidden and input_size will be the same
embedding_size = 300

batch_size = 50
sentence_length = 55
num_epochs=100

In [82]:
import data.load as load
from functools import reduce

#train_set, valid_set, test_set, dic = load.atisfold(3)
train_set, test_set, dic = load.atisfull()
idx_pad = max(dic['words2idx'].values()) + 1
dic['words2idx']['<PAD>'] = idx_pad

idx2label = dict((v,k) for k,v in dic['labels2idx'].items())
idx2word = dict((v,k) for k,v in dic['words2idx'].items())

train_lex, train_ne, train_y = train_set
#valid_lex, valid_ne, valid_y = valid_set
test_lex,  test_ne,  test_y  = test_set

vocsize = len(set(reduce(lambda x, y: list(x)+list(y),
                         train_lex+test_lex))) + 1 # +1 for padding

nclasses = len(set(reduce(lambda x, y: list(x)+list(y),
                          train_y+test_y)))
nsentences = len(train_lex)

In [68]:
max_sentence = max([len(s) for s in train_lex])
print(max_sentence)

46


In [83]:
print(vocsize)
print(idx_pad)

573
572


In [69]:
len(train_lex) + len(test_lex)

5871

In [62]:
def padding(sentence, pad=-1, max_length=50):
    length = len(sentence)
    if len(sentence) < max_length:
        sentence = np.append(sentence, [pad] * (max_length - length))
    return sentence

In [63]:
print(padding(train_lex[0], 46))
print(train_y[0])

[232 542 502 196 208  77  62  10  35  40  58 234 137  62  11 234 481 321
  46  46  46  46  46  46  46  46  46  46  46  46  46  46  46  46  46  46
  46  46  46  46  46  46  46  46  46  46  46  46  46  46]
[126 126 126 126 126  48 126  35  99 126 126 126  78 126  14 126 126  12]


In [71]:
from scipy.sparse import csc_matrix
"""
each batch is a sentence
each batch is a 2D matrix
* height: length of the sentence
* width: the vocaburary size
"""
def gen_data(source, s_pad, Y, y_pad, max_length, vocsize, nclasses, n_batch=5):
    l = n_batch
    for i in range(len(source)):
        if (i*l+l) >= len(source):
            break
        sentences = source[i*l:i*l+l]
        X = []
        for sentence in sentences:
            sentence = padding(sentence, s_pad, max_length=max_length)
            row = np.array([j for j in range(len(sentence))])
            col = np.array([sentence[j] for j in range(len(sentence))])
            data = np.array([1 for _ in range(len(sentence))])
            matrix = csc_matrix((data, (row, col)), shape=(max_length, vocsize))
            X.append(matrix)
        
        batch_answer = Y[i*l:i*l+l]
        y = []
        for answer_seq in batch_answer:
            answer_seq = padding(answer_seq, y_pad, max_length=max_length)
            row = np.array([j for j in range(len(sentence))])
            col = np.array([answer_seq[j] for j in range(len(sentence))])
            data = np.array([1 for _ in range(len(sentence))])
            matrix = csc_matrix((data, (row, col)), shape=(max_length, nclasses))
            y.append(matrix)            
        
        yield (X,y)

In [85]:
g = gen_data(train_lex, idx_pad, train_y, 126, max_sentence, 573, nclasses, 5)

In [95]:
X, y = next(g)

In [3]:
### Model Construction

cell = rnn_cell.BasicLSTMCell(n_hidden)    #we use the basic LSTM cell provided in TensorFlow
                                            #num units is the input-size for this cell

#create placeholders for X and y

inputs = [tf.placeholder(tf.float32,shape=[batch_size, input_size]) for _ in range(sentence_length)]
result = tf.placeholder(tf.float32, shape=[batch_size, sentence_length, nclasses])

outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32)   #note that outputs is a list of seq_len
                                                            #each element is a tensor of size [batch_size,num_units]

outputs2 = outputs[-1]   #we actually only need the last output from the model, ie: last element of outputs


#We actually want the output to be size [batch_size, 1]
#So we will implement a linear layer to do this

W_o = tf.Variable(tf.random_normal([2,1], stddev=0.01))     
b_o = tf.Variable(tf.random_normal([1], stddev=0.01))

outputs2 = outputs[-1]

outputs3 = tf.matmul(outputs2,W_o) + b_o       

cost = tf.reduce_mean(tf.pow(outputs3-result,2))    #compute the cost for this batch of data

#compute updates to parameters in order to minimize cost

#train_op = tf.train.GradientDescentOptimizer(0.008).minimize(cost)
train_op = tf.train.RMSPropOptimizer(0.005, 0.2).minimize(cost) 

In [4]:
### Generate Validation Data
tempX,y_val = gen_data(50,seq_len,batch_size)
X_val = []
for i in range(seq_len):
    X_val.append(tempX[:,i,:])

In [6]:
### Execute

with tf.Session() as sess:

    tf.initialize_all_variables().run()     #initialize all variables in the model

    for k in range(num_epochs):

        #Generate Data for each epoch
        #What this does is it creates a list of of elements of length seq_len, each of size [batch_size,input_size]
        #this is required to feed data into rnn.rnn
        tempX,y = gen_data(50,seq_len,batch_size)
        X = []
        for i in range(seq_len):
            X.append(tempX[:,i,:])

        #Create the dictionary of inputs to feed into sess.run
        temp_dict = {inputs[i]:X[i] for i in range(seq_len)}
        temp_dict.update({result: y})

        sess.run(train_op,feed_dict=temp_dict)   #perform an update on the parameters

        val_dict = {inputs[i]:X_val[i] for i in range(seq_len)}  #create validation dictionary
        val_dict.update({result: y_val})
        c_val = sess.run(cost, feed_dict = val_dict )            #compute the cost on the validation set
        
        print("Validation cost: {}, on Epoch {}".format(c_val,k))

Validation cost: 1.1978884935379028, on Epoch 0
Validation cost: 1.1830931901931763, on Epoch 1
Validation cost: 1.1687648296356201, on Epoch 2
Validation cost: 1.153106451034546, on Epoch 3
Validation cost: 1.1375610828399658, on Epoch 4
Validation cost: 1.1218794584274292, on Epoch 5
Validation cost: 1.1052356958389282, on Epoch 6
Validation cost: 1.0882318019866943, on Epoch 7
Validation cost: 1.0709738731384277, on Epoch 8
Validation cost: 1.0545097589492798, on Epoch 9
Validation cost: 1.037086844444275, on Epoch 10
Validation cost: 1.0197356939315796, on Epoch 11
Validation cost: 1.0022794008255005, on Epoch 12
Validation cost: 0.9850460886955261, on Epoch 13
Validation cost: 0.9678436517715454, on Epoch 14
Validation cost: 0.9506497979164124, on Epoch 15
Validation cost: 0.9332269430160522, on Epoch 16
Validation cost: 0.9158437252044678, on Epoch 17
Validation cost: 0.8984217643737793, on Epoch 18
Validation cost: 0.8811301589012146, on Epoch 19
Validation cost: 0.8635833859443

KeyboardInterrupt: 