In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.WARN)
import pickle
import numpy as np
import os

In [2]:
f = open('../Glove/word_embedding_glove', 'rb')
word_embedding = pickle.load(f)
f.close()
word_embedding = word_embedding[: len(word_embedding)-1]

f = open('../Glove/vocab_glove', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"

f = open("train.pickle", 'rb')
train_data = pickle.load(f)
f.close()

In [None]:
# train_test_split
def train_test(target):
    x = train['target'][0]
    y = train['target'][2]
    x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, shuffle=True, stratify=y)
    return x_train, x_test, y_train, y_test

In [27]:
max_len = 0
for w in train_data['serve'][0]:
    max_len = max(max_len, len(w))

In [35]:
# Model Description
sense_word = 'hard'
model_name = 'basic'
model_dir = 'output/' + sense_word + '/' + model_name
save_dir = os.path.join(model_dir, "save")
log_dir = os.path.join(model_dir, "log")

if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

In [7]:
# Parameters
mode = 'train'
clipping = True
num_senses = 3
batch_size = 1
vocab_size = len(vocab)
unk_vocab_size = 1
word_emb_size = len(word_embedding[0])
max_sent_size = 200
hidden_size = 100
keep_prob = 0.5
l2_lambda = 0.001
init_lr = 0.001
decay_steps = 5000
decay_rate = 0.96
clip_norm = 1

In [11]:
## MODEL

x = tf.placeholder('int32', [batch_size, max_sent_size], name="x")
y = tf.placeholder('int32', [batch_size], name="y")
x_mask  = tf.placeholder('bool', [batch_size, max_sent_size], name='x_mask') 
is_train = tf.placeholder('bool', [], name='is_train')
word_emb_mat = tf.placeholder('float', [None, word_emb_size], name='emb_mat')
input_keep_prob = tf.cond(is_train,lambda:input_keep_prob, lambda:tf.constant(1.0))
x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 1)

with tf.name_scope("word_embedding"):
    if mode == 'train':
        unk_word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[unk_vocab_size, word_emb_size], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32))
    else:
        unk_word_emb_mat = tf.get_variable("word_emb_mat", shape=[unk_vocab_size, word_emb_size], dtype='float')
        
    final_word_emb_mat = tf.concat([word_emb_mat, unk_word_emb_mat], 0)
    Wx = tf.nn.embedding_lookup(final_word_emb_mat, x)  

with tf.variable_scope("lstm"):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

    d_cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=input_keep_prob)
    d_cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=input_keep_prob)
    
    (fw_h, bw_h), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw, d_cell_bw, Wx, sequence_length=x_len, dtype='float', scope='lstm')
    h = tf.concat([fw_h, bw_h], 2)

def attention(input_x, input_mask, W):
    h_masked = tf.boolean_mask(input_x, input_mask)
    h_tanh = tf.tanh(h_masked)
    u = tf.matmul(h_tanh, W)
    a = tf.nn.softmax(u)
    c = tf.reduce_sum(tf.multiply(h_tanh, a), 0)  
    return c

with tf.variable_scope("attention"):
    W = tf.Variable(tf.truncated_normal([2*hidden_size, 1], -0.1, 0.1), name="W")
    
    c = tf.expand_dims(attention(h[0], x_mask[0], W), 0)
    for i in range(1, batch_size):
        c = tf.concat([c, tf.expand_dims(attention(h[i], x_mask[i], W), 0)], 0)
        

with tf.variable_scope("softmax_layer"):
    W = tf.Variable(tf.truncated_normal([2*hidden_size, num_senses], -0.1, 0.1), name="W")
    b = tf.Variable(tf.zeros([num_senses]), name="b")
    drop_c = tf.nn.dropout(c, input_keep_prob)
    logits = tf.matmul(drop_c, W) + b
    predictions = tf.argmax(logits, 1)

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
global_step = tf.Variable(0, trainable=False, name="global_step")

learning_rate = tf.train.exponential_decay(init_lr, global_step, decay_steps, decay_rate, staircase=True)

tv_all = tf.trainable_variables()
tv_regu =[]
for t in tv_all:
    if t.name.find('b:')==-1:
        tv_regu.append(t)
        
# l2 Loss
l2_loss = l2_lambda * tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv_regu ])

total_loss = loss + l2_loss

# Optimizer for loss
optimizer = tf.train.AdamOptimizer(learning_rate)

# Gradients and Variables for Loss
grads_vars = optimizer.compute_gradients(total_loss)

# Clipping of Gradients
clipped_grads = grads_vars
if(clipping == True):
    clipped_grads = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in clipped_grads]

# Training Optimizer for Total Loss
train_op = optimizer.apply_gradients(clipped_grads, global_step=global_step)

# Summaries
grad_summaries = []
var_summaries = []
for g, v in grads_vars:
    if g is not None:
        grad_hist_summary = tf.summary.histogram("{}/grad".format(v.name), g)
        grad_summaries.append(grad_hist_summary)
    var_hist_summary = tf.summary.histogram("{}/var".format(v.name), v)
    var_summaries.append(var_hist_summary)

grad_summaries_merged = tf.summary.merge(grad_summaries)
var_summaries_merged = tf.summary.merge(var_summaries)

loss_summary = tf.summary.scalar("loss", loss)
total_loss_summary = tf.summary.scalar("total_loss", total_loss)
summary = tf.summary.merge_all()

In [118]:
num_train_data = len(train_data[sense_word][0])

In [119]:
num_train_data

4333

In [129]:
train_x = np.zeros([num_train_data, max_sent_size])
train_x_mask = np.zeros([num_train_data, max_sent_size], dtype=bool)
for j in range(num_train_data):
    for i in range(max_sent_size):
        if(i>=len(train_data[sense_word][0][j])):
            break
        w = train_data[sense_word][0][j][i]
        train_x[j][i] = word2id[w] if w in word2id else word2id['UNKNOWN_TOKEN']
        train_x_mask[j][i] = True

train_y = train_data[sense_word][2]

In [131]:
train_x_mask[1]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [16]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

# For initializing all the variables
sess.run(tf.global_variables_initializer())

# For Saving the model
saver = tf.train.Saver()

# For writing Summaries
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)

In [105]:
_, pred, step = sess.run([train_op, predictions, global_step], feed_dict={})

In [108]:
num_batch = len(train_data)

In [106]:
num_epochs = 10

for i in range(num_epochs):
    
    loss_per_epoch = 0
    
    for j in range(num_batches): 
        feed_dict = {
            x:train_x,
            y:[train_y],
            x_mask:train_x_mask,
            is_train:True,
            input_keep_prob:keep_prob,
            word_emb_mat:word_embedding}
    
    # For entity pretraining 
#         _, _loss, step, _summary = sess.run([optimizer_seq, loss_seq, global_step_seq, summary], feed_dict)

    # For complete model training
        _, _loss, step, _summary = sess.run([train_op, total_loss, global_step_dep, summary], feed_dict)
        
        # Suming the loss for each epoch
        loss_per_epoch +=_loss
        
        # Writing the summary
        summary_writer.add_summary(_summary, step)
        
        if(step%100==0):
            print("Steps:", step)
            
        if (j+1)%num_batches==0:
            print("Epoch:", i+1,"Step:", step, "loss:",loss_per_epoch/num_batches)
    
    # Saving the model      
    saver.save(sess, model_dir + '/model')
    print("Saved Model")

array([0])

In [98]:
train_y

0