In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.WARN)
import pickle
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import os
from tensorflow.python.client import device_lib
from collections import Counter
import time

In [2]:
f = open('../Glove/word_embedding_glove', 'rb')
word_embedding = pickle.load(f)
f.close()

word_embedding = word_embedding[: len(word_embedding)-1]

f = open('../Glove/vocab_glove', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"

In [10]:
# Model Description
model_name = 'model-aw-1'
model_dir = 'output/' + model_name
save_dir = os.path.join(model_dir, "save/")
log_dir = os.path.join(model_dir, "log")

if not os.path.exists(model_dir):
    os.mkdir(model_dir)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

In [4]:
with open('/data/aviraj/dataset/train_data','rb') as f:
    train_data=pickle.load(f)
    
with open('/data/aviraj/dataset/val_data','rb') as f:
    val_data=pickle.load(f)
    

with open('/data/aviraj/dataset/fulldata_vocab_sense','rb') as f:
    vocab_sense=pickle.load(f)

    
sense2id = dict((s, i) for i,s in enumerate(vocab_sense))
id2sense = dict((i, s) for i,s in enumerate(vocab))

len(vocab_sense)

46

In [5]:
# Parameters
mode = 'train'
num_senses = len(vocab_sense)
batch_size = 64
vocab_size = len(vocab)
unk_vocab_size = 1
word_emb_size = len(word_embedding[0])
max_sent_size = 200
hidden_size = 200
keep_prob = 0.5
l2_lambda = 0.002
init_lr = 0.005
decay_steps = 500
decay_rate = 0.96
clip_norm = 1
clipping = True
mask_value = -10

In [6]:
# MODEL
x = tf.placeholder('int32', [batch_size, max_sent_size], name="x")
y = tf.placeholder('int32', [batch_size, max_sent_size], name="y")
x_mask  = tf.placeholder('bool', [batch_size, max_sent_size], name='x_mask')
sense_mask  = tf.placeholder('bool', [batch_size, max_sent_size], name='sense_mask')
is_train = tf.placeholder('bool', [], name='is_train')
word_emb_mat = tf.placeholder('float', [None, word_emb_size], name='emb_mat')
input_keep_prob = tf.cond(is_train,lambda:keep_prob, lambda:tf.constant(1.0))
x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 1)

with tf.name_scope("word_embedding"):
    if mode == 'train':
        unk_word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[unk_vocab_size, word_emb_size], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=0, dtype=tf.float32))
    else:
        unk_word_emb_mat = tf.get_variable("word_emb_mat", shape=[unk_vocab_size, word_emb_size], dtype='float')
        
    final_word_emb_mat = tf.concat([word_emb_mat, unk_word_emb_mat], 0)
    Wx = tf.nn.embedding_lookup(final_word_emb_mat, x)  

with tf.variable_scope("lstm1"):
    cell_fw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
    cell_bw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

    d_cell_fw1 = tf.contrib.rnn.DropoutWrapper(cell_fw1, input_keep_prob=input_keep_prob)
    d_cell_bw1 = tf.contrib.rnn.DropoutWrapper(cell_bw1, input_keep_prob=input_keep_prob)
    
    (fw_h1, bw_h1), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw1, d_cell_bw1, Wx, sequence_length=x_len, dtype='float', scope='lstm1')
    h1 = tf.concat([fw_h1, bw_h1], 2)
    
with tf.variable_scope("lstm2"):
    cell_fw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
    cell_bw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

    d_cell_fw2 = tf.contrib.rnn.DropoutWrapper(cell_fw2, input_keep_prob=input_keep_prob)
    d_cell_bw2 = tf.contrib.rnn.DropoutWrapper(cell_bw2, input_keep_prob=input_keep_prob)
    
    (fw_h2, bw_h2), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw2, d_cell_bw2, h1, sequence_length=x_len, dtype='float', scope='lstm2')
    h = tf.concat([fw_h2, bw_h2], 2)

def attention(input_x, input_mask, W_att):
    h_masked = tf.boolean_mask(input_x, input_mask)
    h_tanh = tf.tanh(h_masked)
    u = tf.matmul(h_tanh, W_att)
    a = tf.nn.softmax(u)
    c = tf.reduce_sum(tf.multiply(h_tanh, a), 0)  
    return c

with tf.variable_scope("attention"):
    W_att = tf.Variable(tf.truncated_normal([2*hidden_size, 1], mean=0.0, stddev=0.1, seed=0), name="W_att")
    c = tf.expand_dims(attention(h[0], x_mask[0], W_att), 0)
    for i in range(1, batch_size):
        c = tf.concat([c, tf.expand_dims(attention(h[i], x_mask[i], W_att), 0)], 0)
        
    cc = tf.expand_dims(c, 1)
    c_final = tf.tile(cc, [1,max_sent_size, 1])
    h_final = tf.concat([c_final,h],2)
    flat_h_final = tf.reshape(h_final, [-1, 4*hidden_size])

float_sense_mask = tf.cast(sense_mask, 'float')

with tf.variable_scope("softmax_layer"):
    W = tf.Variable(tf.truncated_normal([4*hidden_size, num_senses], mean=0.0, stddev=0.1, seed=0), name="W")
    b = tf.Variable(tf.zeros([num_senses]), name="b")
    drop_flat_h_final = tf.nn.dropout(flat_h_final, input_keep_prob)
    flat_logits_sense = tf.matmul(drop_flat_h_final, W) + b
    logits = tf.reshape(flat_logits_sense, [batch_size, max_sent_size, num_senses])
    predictions = tf.arg_max(logits, 2)
    
loss = tf.contrib.seq2seq.sequence_loss(logits, y, float_sense_mask, name="loss")

global_step = tf.Variable(0, trainable=False, name="global_step")

learning_rate = tf.train.exponential_decay(init_lr, global_step, decay_steps, decay_rate, staircase=True)

tv_all = tf.trainable_variables()
tv_regu =[]
for t in tv_all:
    if t.name.find('b:')==-1:
        tv_regu.append(t)
        
# l2 Loss
l2_loss = l2_lambda * tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv_regu ])

total_loss = loss + l2_loss

# Optimizer for loss
optimizer = tf.train.AdamOptimizer(learning_rate)

# Gradients and Variables for Loss
grads_vars = optimizer.compute_gradients(total_loss)

# Clipping of Gradients
clipped_grads = grads_vars
if(clipping == True):
    clipped_grads = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in clipped_grads]

# Training Optimizer for Total Loss
train_op = optimizer.apply_gradients(clipped_grads, global_step=global_step)

# Summaries
var_summaries = []
for v in tv_all:
    var_summary = tf.summary.histogram("{}/var".format(v.name), v)
    var_summaries.append(var_summary)

var_summaries_merged = tf.summary.merge(var_summaries)

loss_summary = tf.summary.scalar("loss", loss)
total_loss_summary = tf.summary.scalar("total_loss", total_loss)
summary = tf.summary.merge_all()

In [7]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())                          # For initializing all the variables
saver = tf.train.Saver()                                             # For Saving the model
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)          # For writing Summaries

In [8]:
def model(xx, yy, mask, smask, train_cond=True, pretrain=False):
    num_batches = int(len(xx)/batch_size)
    losses = 0
    preds_sense = []
    true_sense = []
    
    for j in range(num_batches): 
        
        s = j * batch_size
        e = (j+1) * batch_size
        
        feed_dict = {x:xx[s:e], y:yy[s:e], x_mask:mask[s:e], sense_mask:smask[s:e], is_train:train_cond, input_keep_prob:keep_prob, word_emb_mat:word_embedding}
        
        
        if(train_cond==True):
            _, _loss, step, _summary = sess.run([train_op, total_loss, global_step, summary], feed_dict)
            
            summary_writer.add_summary(_summary, step)
            if((j+1)%100==0):
                print("Steps: {}".format(step), ", Loss: {0:.4f}".format(_loss))

        else:
            _loss, pred = sess.run([total_loss, predictions], feed_dict)
            preds_sense.append(pred[smask[s:e]])
            true_sense.append(yy[s:e][smask[s:e]])
        

        losses +=_loss

    if(train_cond==False): 
        sense_preds = []
        sense_true = []
        for i in range(num_batches):
            for ps in preds_sense[i]:      
                sense_preds.append(ps)
            for ts in true_sense[i]:
                sense_true.append(ts)
        return losses/num_batches, sense_preds, sense_true 

    return losses/num_batches, step

def eval_score(yy, pred):
    f1 = f1_score(yy, pred, average='macro')
    accu = accuracy_score(yy, pred)
    return f1*100, accu*100

In [9]:
x_id_train, mask_train, sense_mask_train, y_train = train_data
x_id_val, mask_val, sense_mask_val, y_val = val_data

In [None]:
num_epochs = 2
log_period = 1
log_num_batch = 100

for i in range(num_epochs):
    random = np.random.choice(len(y_train), size=(len(y_train)), replace=False)
    x_id_train = x_id_train[random]
    y_train = y_train[random]
    mask_train = mask_train[random]    
    sense_mask_train = sense_mask_train[random]
    
    start_time = time.time()
    losses, step = model(x_id_train, y_train, sense_mask_train, mask_train)
    time_taken = time.time() - start_time
    print("Epoch: {}".format(i+1),", Step: {}".format(step), ", Loss: {0:.4f}".format(losses), ", Time: {0:.4f}".format(time_taken))
    
    if((i+1)%log_period==0):
        saver.save(sess, save_path=save_dir)                         
        print("Model Saved")
        
#         rdm = np.random.choice(len(y_train), size=(log_num_batch*batch_size), replace=False)
#         x_id_train_rd = x_id_train[rdm]
#         y_train_rd = y_train[rdm]
#         mask_train_rd = mask_train[rdm]    
#         sense_mask_train_rd = sense_mask_train[rdm]
#         start_time = time.time()
#         train_loss, train_pred, train_true = model(x_id_train_rd, y_train_rd, sense_mask_train_rd, mask_train_rd, train_cond=False)        
#         f1_, accu_ = eval_score(train_true, train_pred)
#         time_taken = time.time() - start_time
#         print("Train: F1: {0:.4f}".format(f1_), ", Accu: {0:.4f}".format(accu_), ", Loss: {0:.4f}".format(train_loss), ", Time: {0:.4f}".format(time_taken))
        
        rdm = np.random.choice(len(y_val), size=(log_num_batch*batch_size), replace=False)
        x_id_val_rd = x_id_val[rdm]
        y_val_rd = y_val[rdm]
        mask_val_rd = mask_val[rdm]    
        sense_mask_val_rd = sense_mask_val[rdm]
        start_time = time.time()
        val_loss, val_pred, val_true = model(x_id_val_rd, y_val_rd, sense_mask_val_rd, mask_val_rd, train_cond=False)        
        f1_, accu_ = eval_score(val_true, val_pred)
        time_taken = time.time() - start_time
        print("Train: F1: {0:.4f}".format(f1_), ", Accu: {0:.4f}".format(accu_), ", Loss: {0:.4f}".format(val_loss), ", Time: {0:.4f}".format(time_taken))

Steps: 100 , Loss: 3.0103
Steps: 200 , Loss: 2.0773
Steps: 300 , Loss: 1.4073
Steps: 400 , Loss: 0.9721
Steps: 500 , Loss: 0.6662
Steps: 600 , Loss: 0.5096
Steps: 700 , Loss: 0.3883
Steps: 800 , Loss: 0.4833
Steps: 900 , Loss: 0.6254
Steps: 1000 , Loss: 0.7207
Steps: 1100 , Loss: 0.3572
Steps: 1200 , Loss: 0.2847
Steps: 1300 , Loss: 0.2688
Steps: 1400 , Loss: 0.2741
Steps: 1500 , Loss: 0.2483
Steps: 1600 , Loss: 0.2375
Steps: 1700 , Loss: 0.2458
Steps: 1800 , Loss: 0.3028
Steps: 1900 , Loss: 0.2380
Steps: 2000 , Loss: 0.2389
Steps: 2100 , Loss: 0.2640
Steps: 2200 , Loss: 0.2420
Steps: 2300 , Loss: 0.2787
Steps: 2400 , Loss: 0.2686
Steps: 2500 , Loss: 0.3146
Steps: 2600 , Loss: 0.4298
Steps: 2700 , Loss: 0.4066
Steps: 2800 , Loss: 0.3353
Steps: 2900 , Loss: 0.3375
Steps: 3000 , Loss: 0.6588


In [None]:
saver.restore(sess, save_dir)