In [1]:
import sys, os, _pickle as pickle
import tensorflow as tf
import numpy as np
import nltk
from sklearn.metrics import f1_score

data_dir = 'data'
ckpt_dir = 'checkpoint'
word_embd_dir = 'checkpoint/word_embd'
model_dir = 'checkpoint/model2v2'

word_embd_dim = 100
pos_embd_dim = 25
dep_embd_dim = 25
word_vocab_size = 400001
pos_vocab_size = 10
dep_vocab_size = 21
relation_classes = 19
state_size = 100
batch_size = 10
channels = 3
lambda_l2 = 0.0001
max_len_path = 70
starter_learning_rate = 0.001
decay_steps = 2000
decay_rate = 0.96

In [2]:
with tf.name_scope("input"):
    fp_length = tf.placeholder(tf.int32, shape=[batch_size], name="fp_ength")
    fp = tf.placeholder(tf.int32, [2, batch_size, max_len_path], name="full_path")
    sp_length = tf.placeholder(tf.int32, shape=[batch_size, 2], name="sp_length")
    sp = tf.placeholder(tf.int32, [batch_size, 2, None], name="shortest_path")
    sp_pos = tf.placeholder(tf.int32, [batch_size, 2, None], name="sp_pos")
    sp_childs = tf.placeholder(tf.int32, [batch_size, 2, None, None], name="sp_childs")
    relation = tf.placeholder(tf.int32, [batch_size], name="relation")

with tf.name_scope("word_embedding"):
    W = tf.Variable(tf.constant(0.0, shape=[word_vocab_size, word_embd_dim]), name="W")
    embedding_placeholder = tf.placeholder(tf.float32,[word_vocab_size, word_embd_dim])
    embedding_init = W.assign(embedding_placeholder)
    embd_fp_word = tf.nn.embedding_lookup(W,fp[0])
    word_embedding_saver = tf.train.Saver({"word_embedding/W": W})

with tf.name_scope("pos_embedding"):
    W = tf.Variable(tf.random_uniform([pos_vocab_size, pos_embd_dim]), name="W")
    embd_fp_pos = tf.nn.embedding_lookup(W, fp[1])
    pos_embedding_saver = tf.train.Saver({"pos_embedding/W": W})

with tf.name_scope("dep_embedding"):
    W = tf.Variable(tf.random_uniform([dep_vocab_size, dep_embd_dim]), name="W")
    embd_sp = tf.nn.embedding_lookup(W, sp)
    dep_embedding_saver = tf.train.Saver({"dep_embedding/W": W})
    
embd_fp = tf.concat([embd_fp_word, embd_fp_pos], axis=2)
embd_fp_rev = tf.reverse(embd_fp, [1])
fp_length_rev = tf.reverse(fp_length, [0])

In [3]:
with tf.variable_scope("lstm_fw"):
    cell = tf.contrib.rnn.BasicLSTMCell(state_size)
    states_fw, _ = tf.nn.dynamic_rnn(cell, embd_fp, sequence_length=fp_length, dtype=tf.float32)

with tf.variable_scope("lstm_bw"):
    cell = tf.contrib.rnn.BasicLSTMCell(state_size)
    states, _ = tf.nn.dynamic_rnn(cell, embd_fp_rev, sequence_length=fp_length_rev, dtype=tf.float32)
    states_bw = tf.reverse(states, [1])

In [4]:
hidden_states_seq = tf.concat([states_fw, states_bw], axis=2)

In [5]:
cell_states_seq = tf.concat([states_fw, states_bw], axis=2)

In [6]:
tree_input_size = state_size * 2 + dep_embd_dim
init_const = tf.zeros([1, state_size])

# Tree LSTM bottom-up
with tf.variable_scope("lstm_tree_btup"):
    W_i = tf.get_variable("W_i", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_i = tf.get_variable("U_i", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_i = tf.get_variable("b_i", initializer=init_const)
    
    W_f = tf.get_variable("W_f", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_f = tf.get_variable("U_f", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_f = tf.get_variable("b_f", initializer=init_const)
    
    W_o = tf.get_variable("W_o", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_o = tf.get_variable("U_o", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_o = tf.get_variable("b_o", initializer=init_const)
    
    U_ft = tf.get_variable("U_ft", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())

# Tree LSTM top-down
with tf.variable_scope("lstm_tree_tpdw"):
    W_i = tf.get_variable("W_i", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_i = tf.get_variable("U_i", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_i = tf.get_variable("b_i", initializer=init_const)
    U_it = tf.get_variable("U_it", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())

    W_f = tf.get_variable("W_f", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_f = tf.get_variable("U_f", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_f = tf.get_variable("b_f", initializer=init_const)
    U_ft = tf.get_variable("U_ft", shape=[max_len_path, state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())

    W_o = tf.get_variable("W_o", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_o = tf.get_variable("U_o", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_o = tf.get_variable("b_o", initializer=init_const)
    U_ot = tf.get_variable("U_ot", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())

    W_u = tf.get_variable("W_u", shape=[tree_input_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    U_u = tf.get_variable("U_u", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())
    b_u = tf.get_variable("b_u", initializer=init_const)
    U_ut = tf.get_variable("U_ut", shape=[state_size, state_size], initializer=tf.contrib.layers.xavier_initializer())

In [61]:
def cond(i, steps, *agrs):
    return i< steps

In [62]:
hidden_states_btup = tf.zeros([1, 1, state_size])
cell_states_btup = tf.zeros([1, 1, state_size])

In [63]:
hidden_state_tree = tf.expand_dims(tf.zeros([1, state_size]), 0)
cell_state_tree = tf.expand_dims(tf.zeros([1, state_size]), 0)
num_child_sp = 1

In [None]:
def 

In [77]:
def body1(index, steps, hds, cds, hds_btup, cds_btup):
    i = tf.constant(0)
    inputs = tf.expand_dims(tf.concat([tf.gather(hidden_states_seq[0], sp_pos[0][0][index]), embd_sp[0][0][index]],0), 0)
    childs = sp_childs[0][0][index]
    num_child = tf.shape(childs)[0]

    ht = tf.expand_dims(states_fw[0], 1)
    ct = tf.expand_dims(states_fw[0], 1)
    
    it = tf.matmul(inputs, W_i) + b_i + tf.matmul(hds[0], U_i)
    ft = tf.matmul(inputs, W_f) + b_f + tf.matmul(hds[0], U_f)
    ot = tf.matmul(inputs, W_o) + b_o + tf.matmul(hds[0], U_o)
    ut = tf.matmul(inputs, W_u) + b_u + tf.matmul(hds[0], U_u)
    
    def body4(k, steps, it, ft, ot, ut):
        it += tf.matmul(hds[k], U_i)
        ft += tf.matmul(hds[k], U_f) 
        ot += tf.matmul(hds[k], U_o) 
        ut += tf.matmul(hds[k], U_u) 
        return k+1, steps, it, ft, ot, ut
    _, _, it, ft, ot, ut = tf.while_loop(cond, body4, [1, num_child_sp, it, ft, ot, ut])

    
    def body(k, steps, out, U):
        out += tf.matmul(tf.gather(ht, childs[k]), U)
        return k+1, steps, out, U

    _, _, ht_i, _ = tf.while_loop(cond, body, [i, num_child, it, U_it])
    _, _, ht_o, _ = tf.while_loop(cond, body, [i, num_child, ot, U_ot])
    _, _, ht_u, _ = tf.while_loop(cond, body, [i, num_child, ut, U_ut])

    input_gate = tf.sigmoid(ht_i)
    output_gate = tf.sigmoid(ht_o)
    u_input = tf.sigmoid(ht_u)
    
    cell_state = input_gate * u_input 
    
    def body5(k, steps, cell_state):
        _, _, f, _ = tf.while_loop(cond, body, [i, num_child, ft, U_ft[k]])
        cell_state += tf.sigmoid(f) * cds[k]
        return k+1, steps, cell_state
    _, _, cell_state = tf.while_loop(cond, body5, [i, num_child_sp, cell_state])
    def body2(k, steps, ctl):
        _, _, fj, _ = tf.while_loop(cond, body, [i, num_child, ft, U_ft[k+2]])
        ctl += tf.sigmoid(fj) * tf.gather(ct, childs[k])
        return k+1, steps, ctl

    _, _, cds = tf.while_loop(cond, body2, [i, num_child, cell_state])
    
    hds = tf.expand_dims(output_gate * tf.tanh(cds), 0)
    
    cds = tf.expand_dims(cds, 0)
    if(index==0):
        hds_btup = hds
        cds_btup = cds
    else:
        hds_btup = tf.concat([hds_btup, hds], 0)
        cds_btup = tf.concat([cds_btup, cds], 0)
    return index+1, steps, hds, cds, hds_btup, cds_btup


In [78]:
index, _, _, _, hidden_states_btup, cell_states_btup = tf.while_loop(
    cond, body1, 
    [i, sp_length[0][0], hidden_state_tree, cell_state_tree, 
     hidden_states_btup, cell_states_btup], 
    shape_invariants=[i.get_shape(),i.get_shape(), 
    hidden_state_tree.get_shape(), hidden_state_tree.get_shape(), 
    tf.TensorShape([None, 1, state_size]), 
    tf.TensorShape([None, 1, state_size])])

In [79]:
hidden_states_btup

<tf.Tensor 'while_14/Exit_4:0' shape=(?, 1, 100) dtype=float32>

In [6]:
with tf.name_scope("hidden_layer"):
    W = tf.Variable(tf.truncated_normal([600, 100], -0.1, 0.1), name="W")
    b = tf.Variable(tf.zeros([100]), name="b")
    y_hidden_layer = tf.matmul(state_series, W) + b

with tf.name_scope("dropout"):
    y_hidden_layer_drop = tf.nn.dropout(y_hidden_layer, 0.3)

with tf.name_scope("softmax_layer"):
    W = tf.Variable(tf.truncated_normal([100, relation_classes], -0.1, 0.1), name="W")
    b = tf.Variable(tf.zeros([relation_classes]), name="b")
    logits = tf.matmul(y_hidden_layer_drop, W) + b
    predictions = tf.argmax(logits, 1)

tv_all = tf.trainable_variables()
tv_regu = []
non_reg = ["word_embedding/W:0","pos_embedding/W:0",'dep_embedding/W:0',"global_step:0",'hidden_layer/b:0','softmax_layer/b:0']
for t in tv_all:
    if t.name not in non_reg:
        if(t.name.find('biases')==-1):
            tv_regu.append(t)

with tf.name_scope("loss"):
    l2_loss = lambda_l2 * tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv_regu ])
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
    total_loss = loss + l2_loss

global_step = tf.Variable(0, trainable=False, name="global_step")

learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps, decay_rate, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(total_loss, global_step=global_step)

In [7]:
f = open('data/vocab.pkl', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"
word2id[unknown_token] = word_vocab_size -1
id2word[word_vocab_size-1] = unknown_token

pos_tags_vocab = []
for line in open('data/pos_tags.txt'):
        pos_tags_vocab.append(line.strip())

dep_vocab = []
for line in open('data/dependency_types.txt'):
    dep_vocab.append(line.strip())

relation_vocab = []
for line in open('data/relation_types.txt'):
    relation_vocab.append(line.strip())

rel2id = dict((w, i) for i,w in enumerate(relation_vocab))
id2rel = dict((i, w) for i,w in enumerate(relation_vocab))

pos_tag2id = dict((w, i) for i,w in enumerate(pos_tags_vocab))
id2pos_tag = dict((i, w) for i,w in enumerate(pos_tags_vocab))

dep2id = dict((w, i) for i,w in enumerate(dep_vocab))
id2dep = dict((i, w) for i,w in enumerate(dep_vocab))

pos_tag2id['OTH'] = 9
id2pos_tag[9] = 'OTH'

dep2id['OTH'] = 20
id2dep[20] = 'OTH'

JJ_pos_tags = ['JJ', 'JJR', 'JJS']
NN_pos_tags = ['NN', 'NNS', 'NNP', 'NNPS']
RB_pos_tags = ['RB', 'RBR', 'RBS']
PRP_pos_tags = ['PRP', 'PRP$']
VB_pos_tags = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
_pos_tags = ['CC', 'CD', 'DT', 'IN']

def pos_tag(x):
    if x in JJ_pos_tags:
        return pos_tag2id['JJ']
    if x in NN_pos_tags:
        return pos_tag2id['NN']
    if x in RB_pos_tags:
        return pos_tag2id['RB']
    if x in PRP_pos_tags:
        return pos_tag2id['PRP']
    if x in VB_pos_tags:
        return pos_tag2id['VB']
    if x in _pos_tags:
        return pos_tag2id[x]
    else:
        return 9

In [8]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

In [9]:
# f = open('data/word_embedding', 'rb')
# word_embedding = pickle.load(f)
# f.close()
# sess.run(embedding_init, feed_dict={embedding_placeholder:word_embedding})
# word_embedding_saver.save(sess, word_embd_dir + '/word_embd')

In [10]:
# model = tf.train.latest_checkpoint(model_dir)
# saver.restore(sess, model)

In [11]:
latest_embd = tf.train.latest_checkpoint(word_embd_dir)
word_embedding_saver.restore(sess, latest_embd)

INFO:tensorflow:Restoring parameters from checkpoint/word_embd/word_embd


In [12]:
f = open('data/train_lca_paths', 'rb')
word_p, dep_p, pos_p = pickle.load(f)
f.close()
relations = []
for line in open('data/train_relations.txt'):
    relations.append(line.strip().split()[1])

length = len(word_p)
num_batches = int(length/batch_size)

for i in range(length):
    for j, word in enumerate(word_p[i]):
        word = word.lower()
        word_p[i][j] = word if word in word2id else unknown_token 
    for l, d in enumerate(dep_p[i]):
        dep_p[i][l] = d if d in dep2id else 'OTH'
        
word_p_ids = np.ones([length, max_len_path],dtype=int)
pos_p_ids = np.ones([length, max_len_path],dtype=int)
dep_p_ids = np.ones([length, max_len_path],dtype=int)
rel_ids = np.array([rel2id[rel] for rel in relations])
path_len = np.array([len(w) for w in word_p], dtype=int)

for i in range(length):
    for j, w in enumerate(word_p[i]):
        word_p_ids[i][j] = word2id[w]
        
    for j, w in enumerate(pos_p[i]):
        pos_p_ids[i][j] = pos_tag(w)
        
    for j, w in enumerate(dep_p[i]):
        dep_p_ids[i][j] = dep2id[w]

In [13]:
num_epochs = 10
for i in range(num_epochs):
    loss_per_epoch = 0
    for j in range(num_batches):
        feed_dict = {
            path_length:path_len[j*batch_size:(j+1)*batch_size],
            word_ids:word_p_ids[j*batch_size:(j+1)*batch_size],
            pos_ids:pos_p_ids[j*batch_size:(j+1)*batch_size],
            dep_ids:dep_p_ids[j*batch_size:(j+1)*batch_size],
            y:rel_ids[j*batch_size:(j+1)*batch_size]}
        _, _loss, step = sess.run([optimizer, total_loss, global_step], feed_dict)
        loss_per_epoch +=_loss
        if (j+1)%num_batches==0:
            print("Epoch:", i+1,"Step:", step, "loss:",loss_per_epoch/num_batches)
    saver.save(sess, model_dir + '/model')
    print("Saved Model")

Epoch: 1 Step: 800 loss: 2.85300489247
Saved Model
Epoch: 2 Step: 1600 loss: 2.73827668965
Saved Model
Epoch: 3 Step: 2400 loss: 2.70001435518
Saved Model
Epoch: 4 Step: 3200 loss: 2.68624746531
Saved Model
Epoch: 5 Step: 4000 loss: 2.68042603165
Saved Model
Epoch: 6 Step: 4800 loss: 2.67750604913
Saved Model
Epoch: 7 Step: 5600 loss: 2.67583220631
Saved Model
Epoch: 8 Step: 6400 loss: 2.67482194766
Saved Model
Epoch: 9 Step: 7200 loss: 2.67411908716
Saved Model
Epoch: 10 Step: 8000 loss: 2.67369878128
Saved Model


In [None]:
# training accuracy
all_predictions = []
for j in range(num_batches):
     feed_dict = {
            path_length:path_len[j*batch_size:(j+1)*batch_size],
            word_ids:word_p_ids[j*batch_size:(j+1)*batch_size],
            pos_ids:pos_p_ids[j*batch_size:(j+1)*batch_size],
            dep_ids:dep_p_ids[j*batch_size:(j+1)*batch_size],
            y:rel_ids[j*batch_size:(j+1)*batch_size]}
    batch_predictions = sess.run(predictions, feed_dict)
    all_predictions.append(batch_predictions)

y_pred = []
for i in range(num_batches):
    for pred in all_predictions[i]:
        y_pred.append(pred)

count = 0
for i in range(batch_size*num_batches):
    count += y_pred[i]==rel_ids[i]
accuracy = count/(batch_size*num_batches) * 100

print("training accuracy", accuracy)

In [25]:
f = open('data/test_lca_paths', 'rb')
word_p, dep_p, pos_p = pickle.load(f)
f.close()

relations = []
for line in open('data/test_relations.txt'):
    relations.append(line.strip().split()[0])

length = len(word_p1)
num_batches = int(length/batch_size)

for i in range(length):
    for j, word in enumerate(word_p[i]):
        word = word.lower()
        word_p[i][j] = word if word in word2id else unknown_token 
    for l, d in enumerate(dep_p[i]):
        dep_p[i][l] = d if d in dep2id else 'OTH'
        
word_p_ids = np.ones([length, max_len_path],dtype=int)
pos_p_ids = np.ones([length, max_len_path],dtype=int)
dep_p_ids = np.ones([length, max_len_path],dtype=int)
rel_ids = np.array([rel2id[rel] for rel in relations])
path_len = np.array([len(w) for w in word_p], dtype=int)

for i in range(length):
    for j, w in enumerate(word_p[i]):
        word_p_ids[i][j] = word2id[w]
        
    for j, w in enumerate(pos_p[i]):
        pos_p_ids[i][j] = pos_tag(w)
        
    for j, w in enumerate(dep_p[i]):
        dep_p_ids[i][j] = dep2id[w]

# test predictions
all_predictions = []
for j in range(num_batches):
     feed_dict = {
            path_length:path_len[j*batch_size:(j+1)*batch_size],
            word_ids:word_p_ids[j*batch_size:(j+1)*batch_size],
            pos_ids:pos_p_ids[j*batch_size:(j+1)*batch_size],
            dep_ids:dep_p_ids[j*batch_size:(j+1)*batch_size],
            y:rel_ids[j*batch_size:(j+1)*batch_size]}
    batch_predictions = sess.run(predictions, feed_dict)
    all_predictions.append(batch_predictions)

y_pred = []
for i in range(num_batches):
    for pred in all_predictions[i]:
        y_pred.append(pred)

count = 0
for i in range(batch_size*num_batches):
    count += y_pred[i]==rel_ids[i]
accuracy = count/(batch_size*num_batches) * 100

print("test accuracy", accuracy)

ValueError: Cannot feed value of shape (2, 10) for Tensor 'input_1/path1_length:0', which has shape '(10,)'