### learning rate = 0.001, used convolution for extracting features using word embeddings


In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.WARN)
import pickle
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import os
from tensorflow.python.client import device_lib
from collections import Counter
import time

In [2]:
f = open('../../Glove/word_embedding_glove', 'rb')
word_embedding = pickle.load(f)
f.close()

word_embedding = word_embedding[: len(word_embedding)-1]

f = open('../../Glove/vocab_glove', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"

# Model Description
model_name = 'model-aw-lex-1-4'
model_dir = '../output/all-word/' + model_name
save_dir = os.path.join(model_dir, "save/")
log_dir = os.path.join(model_dir, "log")

if not os.path.exists(model_dir):
    os.mkdir(model_dir)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

with open('/data/aviraj/dataset/train_val_data_fine/all_word_lex','rb') as f:
    train_data, val_data = pickle.load(f)    
    
# Parameters
mode = 'train'
num_senses = 45
num_pos = 12
batch_size = 64
vocab_size = len(vocab)
unk_vocab_size = 1
word_emb_size = len(word_embedding[0])
max_sent_size = 200
hidden_size = 256
num_filter = 256
kernel_size = 5
keep_prob = 0.3
l2_lambda = 0.001
init_lr = 0.001
decay_steps = 500
decay_rate = 0.9
clip_norm = 1
clipping = True
moving_avg_deacy = 0.999
num_gpus = 6

In [3]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [4]:
# MODEL
device_num = 0
tower_grads = []
losses = []
predictions = []
predictions_pos = []

x = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="x")
y = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="y")
y_pos = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="y")
x_mask  = tf.placeholder('bool', [num_gpus, batch_size, max_sent_size], name='x_mask') 
sense_mask  = tf.placeholder('bool', [num_gpus, batch_size, max_sent_size], name='sense_mask')
is_train = tf.placeholder('bool', [], name='is_train')
word_emb_mat = tf.placeholder('float', [None, word_emb_size], name='emb_mat')
input_keep_prob = tf.cond(is_train,lambda:keep_prob, lambda:tf.constant(1.0))
pretrain = tf.placeholder('bool', [], name="pretrain")

global_step = tf.Variable(0, trainable=False, name="global_step")
learning_rate = tf.train.exponential_decay(init_lr, global_step, decay_steps, decay_rate, staircase=True)
summaries = []

def global_attention(input_x, input_mask, W_att):
    h_masked = tf.boolean_mask(input_x, input_mask)
    h_tanh = tf.tanh(h_masked)
    u = tf.matmul(h_tanh, W_att)
    a = tf.nn.softmax(u)
    c = tf.reduce_sum(tf.multiply(h_tanh, a), 0)  
    return c

with tf.variable_scope("word_embedding"):
    unk_word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[unk_vocab_size, word_emb_size], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=0, dtype=tf.float32))
    final_word_emb_mat = tf.concat([word_emb_mat, unk_word_emb_mat], 0)

with tf.variable_scope(tf.get_variable_scope()):
    for gpu_idx in range(num_gpus):
        if gpu_idx>int(num_gpus/2)-1:
            device_num = 1
        with tf.name_scope("model_{}".format(gpu_idx)) as scope, tf.device('/gpu:%d' % device_num):

            if gpu_idx > 0:
                    tf.get_variable_scope().reuse_variables()

            with tf.name_scope("word"):
                Wx = tf.nn.embedding_lookup(final_word_emb_mat, x[gpu_idx])  

            float_x_mask = tf.cast(x_mask[gpu_idx], 'float')
            tile_x_mask = tf.tile(tf.expand_dims(float_x_mask, 2), [1, 1, word_emb_size])
            Wx_masked = tf.multiply(Wx, tile_x_mask)
            x_len = tf.reduce_sum(tf.cast(x_mask[gpu_idx], 'int32'), 1)
            
            with tf.variable_scope("convolution"):
                conv1 = tf.layers.conv1d(inputs=Wx_masked, filters=num_filter, kernel_size=[kernel_size], padding='same', activation=tf.nn.relu)
                conv2 = tf.layers.conv1d(inputs=conv1, filters=num_filter, kernel_size=[kernel_size], padding='same')
                
            with tf.variable_scope("lstm1"):
                cell_fw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
                cell_bw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

                d_cell_fw1 = tf.contrib.rnn.DropoutWrapper(cell_fw1, input_keep_prob=input_keep_prob)
                d_cell_bw1 = tf.contrib.rnn.DropoutWrapper(cell_bw1, input_keep_prob=input_keep_prob)

                (fw_h1, bw_h1), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw1, d_cell_bw1, conv2, sequence_length=x_len, dtype='float', scope='lstm1')
                h1 = tf.concat([fw_h1, bw_h1], 2)

            with tf.variable_scope("lstm2"):
                cell_fw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
                cell_bw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

                d_cell_fw2 = tf.contrib.rnn.DropoutWrapper(cell_fw2, input_keep_prob=input_keep_prob)
                d_cell_bw2 = tf.contrib.rnn.DropoutWrapper(cell_bw2, input_keep_prob=input_keep_prob)

                (fw_h2, bw_h2), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw2, d_cell_bw2, h1, sequence_length=x_len, dtype='float', scope='lstm2')
                h = tf.concat([fw_h2, bw_h2], 2)

            with tf.variable_scope("global_attention"):
                W_att = tf.get_variable("W_att", shape=[2*hidden_size, 1], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*10))
                c = tf.expand_dims(global_attention(h[0], x_mask[gpu_idx][0], W_att), 0)
                for i in range(1, batch_size):
                    c = tf.concat([c, tf.expand_dims(global_attention(h[i], x_mask[gpu_idx][i], W_att), 0)], 0)
                cc = tf.expand_dims(c, 1)
                c_final = tf.tile(cc, [1, max_sent_size, 1])

            h_final = tf.concat([c_final, h], 2)
            flat_h_final = tf.reshape(h_final, [-1, tf.shape(h_final)[2]])

            with tf.variable_scope("hidden_layer"):
                W = tf.get_variable("W", shape=[4*hidden_size, 2*hidden_size], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*20))
                b = tf.get_variable("b", shape=[2*hidden_size], initializer=tf.zeros_initializer())
                drop_flat_h_final = tf.nn.dropout(flat_h_final, input_keep_prob)
                flat_hl = tf.matmul(drop_flat_h_final, W) + b

            with tf.variable_scope("softmax_layer"):
                W = tf.get_variable("W", shape=[2*hidden_size, num_senses], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*20))
                b = tf.get_variable("b", shape=[num_senses], initializer=tf.zeros_initializer())
                drop_flat_hl = tf.nn.dropout(flat_hl, input_keep_prob)
                flat_logits_sense = tf.matmul(drop_flat_hl, W) + b
                logits = tf.reshape(flat_logits_sense, [batch_size, max_sent_size, num_senses])
                predictions.append(tf.arg_max(logits, 2))

            with tf.variable_scope("softmax_layer_pos"):
                W = tf.get_variable("W", shape=[2*hidden_size, num_pos], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*30))
                b = tf.get_variable("b", shape=[num_pos], initializer=tf.zeros_initializer())
                flat_h1 = tf.reshape(h1, [-1, tf.shape(h1)[2]])
                drop_flat_hl = tf.nn.dropout(flat_hl, input_keep_prob)
                flat_logits_pos = tf.matmul(drop_flat_hl, W) + b
                logits_pos = tf.reshape(flat_logits_pos, [batch_size, max_sent_size, num_pos])
                predictions_pos.append(tf.arg_max(logits_pos, 2))


            float_sense_mask = tf.cast(sense_mask[gpu_idx], 'float')

            loss = tf.contrib.seq2seq.sequence_loss(logits, y[gpu_idx], float_sense_mask, name="loss")
            loss_pos = tf.contrib.seq2seq.sequence_loss(logits_pos, y_pos[gpu_idx], float_x_mask, name="loss_")

            l2_loss = l2_lambda * tf.losses.get_regularization_loss()

            total_loss = tf.cond(pretrain, lambda:loss_pos, lambda:loss + loss_pos + l2_loss)

            summaries.append(tf.summary.scalar("loss_{}".format(gpu_idx), loss))
            summaries.append(tf.summary.scalar("loss_pos_{}".format(gpu_idx), loss_pos))
            summaries.append(tf.summary.scalar("total_loss_{}".format(gpu_idx), total_loss))


            optimizer = tf.train.AdamOptimizer(learning_rate)
            grads_vars = optimizer.compute_gradients(total_loss)

            clipped_grads = grads_vars
            if(clipping == True):
                clipped_grads = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in clipped_grads]

            tower_grads.append(clipped_grads)
            losses.append(total_loss)

tower_grads = average_gradients(tower_grads)
losses = tf.add_n(losses)/len(losses)
apply_grad_op = optimizer.apply_gradients(tower_grads, global_step=global_step)
summaries.append(tf.summary.scalar('total_loss', losses))
summaries.append(tf.summary.scalar('learning_rate', learning_rate))

for var in tf.trainable_variables():
    summaries.append(tf.summary.histogram(var.op.name, var))

variable_averages = tf.train.ExponentialMovingAverage(moving_avg_deacy, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())

train_op = tf.group(apply_grad_op, variables_averages_op)
saver = tf.train.Saver(tf.global_variables())
summary = tf.summary.merge(summaries)

In [5]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
# print (device_lib.list_local_devices())
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())                          # For initializing all the variables
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)          # For writing Summaries

In [6]:
save_period = 100
log_period = 100

def model(xx, yy, yy_pos, mask, smask, train_cond=True, pretrain_cond=False):
    num_batches = int(len(xx)/(batch_size*num_gpus))
    _losses = 0
    temp_loss = 0
    preds_sense = []
    true_sense = []
    preds_pos = []
    true_pos = []
    
    for j in range(num_batches): 
        
        s = j * batch_size * num_gpus
        e = (j+1) * batch_size * num_gpus
        xx_re = xx[s:e].reshape([num_gpus, batch_size, -1])
        yy_re = yy[s:e].reshape([num_gpus, batch_size, -1])
        yy_pos_re = yy_pos[s:e].reshape([num_gpus, batch_size, -1])
        mask_re = mask[s:e].reshape([num_gpus, batch_size, -1])
        smask_re = smask[s:e].reshape([num_gpus, batch_size, -1])
 
        feed_dict = {x:xx_re, y:yy_re, y_pos:yy_pos_re, x_mask:mask_re, sense_mask:smask_re, pretrain:pretrain_cond, is_train:train_cond, input_keep_prob:keep_prob, word_emb_mat:word_embedding}
        
        if(train_cond==True):
            _, _loss, step, _summary = sess.run([train_op, losses, global_step, summary], feed_dict)
            summary_writer.add_summary(_summary, step)
            
            temp_loss += _loss
            if((j+1)%log_period==0):
                print("Steps: {}".format(step), "Loss:{0:.4f}".format(temp_loss/log_period), ", Current Loss: {0:.4f}".format(_loss))
                temp_loss = 0
            if((j+1)%save_period==0):
                saver.save(sess, save_path=save_dir)                         
                
        else:
            _loss, pred, pred_pos = sess.run([total_loss, predictions, predictions_pos], feed_dict)
            for i in range(num_gpus):
                preds_sense.append(pred[i][smask_re[i]])
                true_sense.append(yy_re[i][smask_re[i]])
                preds_pos.append(pred_pos[i][mask_re[i]])
                true_pos.append(yy_pos_re[i][mask_re[i]])

        _losses +=_loss

    if(train_cond==False): 
        sense_preds = []
        sense_true = []
        pos_preds = []
        pos_true = []
        
        for preds in preds_sense:
            for ps in preds:      
                sense_preds.append(ps)  
        for trues in true_sense:
            for ts in trues:
                sense_true.append(ts)
        
        for preds in preds_pos:
            for ps in preds:      
                pos_preds.append(ps)      
        for trues in true_pos:
            for ts in trues:
                pos_true.append(ts)
                
        return _losses/num_batches, sense_preds, sense_true, pos_preds, pos_true

    return _losses/num_batches, step

def eval_score(yy, pred, yy_pos, pred_pos):
    f1 = f1_score(yy, pred, average='macro')
    accu = accuracy_score(yy, pred)
    f1_pos = f1_score(yy_pos, pred_pos, average='macro')
    accu_pos = accuracy_score(yy_pos, pred_pos)
    return f1*100, accu*100, f1_pos*100, accu_pos*100

In [7]:
x_id_train = train_data['x']
mask_train = train_data['x_mask']
sense_mask_train = train_data['sense_mask']
y_train = train_data['y']
y_pos_train = train_data['pos']

x_id_val = val_data['x']
mask_val = val_data['x_mask']
sense_mask_val = val_data['sense_mask']
y_val = val_data['y']
y_pos_val = val_data['pos']

In [8]:
def testing():
    start_time = time.time()
    val_loss, val_pred, val_true, val_pred_pos, val_true_pos = model(x_id_val, y_val, y_pos_val, mask_val, sense_mask_val, train_cond=False)        
    f1_, accu_, f1_pos_, accu_pos_ = eval_score(val_true, val_pred, val_true_pos, val_pred_pos)
    time_taken = time.time() - start_time
    print("Val: F1 Score:{0:.2f}".format(f1_), "Accuracy:{0:.2f}".format(accu_), " POS: F1 Score:{0:.2f}".format(f1_pos_), "Accuracy:{0:.2f}".format(accu_pos_), "Loss:{0:.4f}".format(val_loss), ", Time: {0:.1f}".format(time_taken))
    return f1_, accu_, f1_pos_, accu_pos_

def training(current_epoch, pre_train_cond):
        random = np.random.choice(len(y_train), size=(len(y_train)), replace=False)
        x_id_train_tmp = x_id_train[random]
        y_train_tmp = y_train[random]
        mask_train_tmp = mask_train[random]    
        sense_mask_train_tmp = sense_mask_train[random]
        y_pos_train_tmp = y_pos_train[random]

        start_time = time.time()
        train_loss, step = model(x_id_train_tmp, y_train_tmp, y_pos_train_tmp, mask_train_tmp, sense_mask_train_tmp, pretrain_cond=pre_train_cond)
        time_taken = time.time() - start_time
        print("Epoch: {}".format(current_epoch+1),", Step: {}".format(step), ", loss: {0:.4f}".format(train_loss), ", Time: {0:.1f}".format(time_taken))
        saver.save(sess, save_path=save_dir)                         
        print("Model Saved")
        return [step, train_loss]

In [10]:
loss_collection = []
val_collection = []
num_epochs = 10
val_period = 2

# Pretraining POS Tags
loss_collection.append(training(i, True))
val_collection.append(testing())

Steps: 100 Loss:9.1989 , Current Loss: 2.1383
Steps: 200 Loss:2.0983 , Current Loss: 2.0462
Steps: 300 Loss:1.9160 , Current Loss: 1.7626
Steps: 400 Loss:1.3839 , Current Loss: 1.1323
Steps: 500 Loss:0.9359 , Current Loss: 0.7643
Steps: 600 Loss:0.6473 , Current Loss: 0.5642
Steps: 700 Loss:0.5082 , Current Loss: 0.4730
Steps: 800 Loss:0.4297 , Current Loss: 0.4146
Steps: 900 Loss:0.3753 , Current Loss: 0.3453
Steps: 1000 Loss:0.3316 , Current Loss: 0.3059
Steps: 1100 Loss:0.2968 , Current Loss: 0.2864
Steps: 1200 Loss:0.2672 , Current Loss: 0.2495
Steps: 1300 Loss:0.2478 , Current Loss: 0.2552
Steps: 1400 Loss:0.2293 , Current Loss: 0.2242
Steps: 1500 Loss:0.2067 , Current Loss: 0.1914
Steps: 1600 Loss:0.1906 , Current Loss: 0.1830
Steps: 1700 Loss:0.1763 , Current Loss: 0.1711
Epoch: 64 , Step: 1771 , loss: 1.1043 , Time: 11749.2
Model Saved


  'recall', 'true', average, warn_for)


Val: F1 Score:1.62 Accuracy:2.00  POS: F1 Score:88.83 Accuracy:95.04 Loss:8.6611 , Time: 2263.9
Steps: 1871 Loss:0.1562 , Current Loss: 0.1427


KeyboardInterrupt: 

In [None]:
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 2052 Loss:3.3164 , Current Loss: 2.9249
Steps: 2152 Loss:2.6982 , Current Loss: 2.6539
Steps: 2252 Loss:2.5134 , Current Loss: 2.3444
Steps: 2352 Loss:2.3963 , Current Loss: 2.4742
Steps: 2452 Loss:2.3296 , Current Loss: 2.2533
Steps: 2552 Loss:2.2754 , Current Loss: 2.3117
Steps: 2652 Loss:2.2334 , Current Loss: 2.2473
Steps: 2752 Loss:2.1891 , Current Loss: 2.0999
Steps: 2852 Loss:2.1574 , Current Loss: 2.1592
Steps: 2952 Loss:2.1154 , Current Loss: 2.0235
Steps: 3052 Loss:2.0940 , Current Loss: 2.0204
Steps: 3152 Loss:2.0549 , Current Loss: 2.0922
Steps: 3252 Loss:2.0362 , Current Loss: 2.1125
Steps: 3352 Loss:2.0035 , Current Loss: 1.9530
Steps: 3452 Loss:1.9732 , Current Loss: 1.8338
Steps: 3552 Loss:1.9511 , Current Loss: 1.8576
Steps: 3652 Loss:1.9250 , Current Loss: 2.1319
Epoch: 1 , Step: 3723 , loss: 2.2360 , Time: 16815.5
Model Saved
Steps: 3823 Loss:1.8577 , Current Loss: 1.6996
Steps: 3923 Loss:1.8623 , Current Loss: 1.8257
Steps: 4023 Loss:1.8193 , Current Loss: 1.

In [11]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 4923 Loss:1.6377 , Current Loss: 1.5147
Steps: 5023 Loss:1.6370 , Current Loss: 1.5468
Steps: 5123 Loss:1.6022 , Current Loss: 1.5690
Steps: 5223 Loss:1.5983 , Current Loss: 1.5429
Steps: 5323 Loss:1.5897 , Current Loss: 1.6596
Steps: 5423 Loss:1.5729 , Current Loss: 1.4462
Steps: 5523 Loss:1.5633 , Current Loss: 1.5260
Steps: 5623 Loss:1.5391 , Current Loss: 1.6116
Steps: 5723 Loss:1.5308 , Current Loss: 1.4481
Steps: 5823 Loss:1.5139 , Current Loss: 1.5606
Steps: 5923 Loss:1.5042 , Current Loss: 1.4563
Steps: 6023 Loss:1.4825 , Current Loss: 1.3770
Steps: 6123 Loss:1.4966 , Current Loss: 1.4541
Steps: 6223 Loss:1.4893 , Current Loss: 1.3478
Steps: 6323 Loss:1.4766 , Current Loss: 1.4357
Steps: 6423 Loss:1.4601 , Current Loss: 1.5925
Steps: 6523 Loss:1.4403 , Current Loss: 1.3285
Epoch: 1 , Step: 6594 , loss: 1.5332 , Time: 11766.0
Model Saved
Steps: 6694 Loss:1.4052 , Current Loss: 1.3481
Steps: 6794 Loss:1.4008 , Current Loss: 1.4364
Steps: 6894 Loss:1.3823 , Current Loss: 1.

KeyboardInterrupt: 

In [None]:
testing()

Val: F1 Score:55.67 Accuracy:67.24  POS: F1 Score:91.11 Accuracy:95.95 Loss:1.2112 , Time: 2268.4


(55.667867534724493, 67.24365329462168, 91.114662306819355, 95.950279386802848)

In [None]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 10341 Loss:1.1559 , Current Loss: 1.1919
Steps: 10441 Loss:1.1635 , Current Loss: 1.1936
Steps: 10541 Loss:1.1677 , Current Loss: 1.0781
Steps: 10641 Loss:1.1611 , Current Loss: 1.1736
Steps: 10741 Loss:1.1605 , Current Loss: 1.2458
Steps: 10841 Loss:1.1578 , Current Loss: 1.1275
Steps: 10941 Loss:1.1515 , Current Loss: 0.9577
Steps: 11041 Loss:1.1716 , Current Loss: 1.3048
Steps: 11141 Loss:1.1651 , Current Loss: 1.3566
Steps: 11241 Loss:1.1490 , Current Loss: 1.1201
Steps: 11341 Loss:1.1500 , Current Loss: 1.1885
Steps: 11441 Loss:1.1336 , Current Loss: 1.1917


In [None]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 11541 Loss:1.1244 , Current Loss: 1.0213
Steps: 11641 Loss:1.1196 , Current Loss: 1.0545
Steps: 11741 Loss:1.1205 , Current Loss: 1.0195
Steps: 11841 Loss:1.1306 , Current Loss: 1.1614
Steps: 11941 Loss:1.1325 , Current Loss: 1.1343
Steps: 12041 Loss:1.1127 , Current Loss: 1.1756
Steps: 12141 Loss:1.1217 , Current Loss: 1.1518
Steps: 12241 Loss:1.1279 , Current Loss: 1.1131
Steps: 12341 Loss:1.0970 , Current Loss: 1.0538
Steps: 12441 Loss:1.1231 , Current Loss: 1.1238
Steps: 12541 Loss:1.1076 , Current Loss: 1.0790
Steps: 12641 Loss:1.1010 , Current Loss: 1.0229
Steps: 12741 Loss:1.1092 , Current Loss: 1.1688
Steps: 12841 Loss:1.0961 , Current Loss: 1.2620
Steps: 12941 Loss:1.0944 , Current Loss: 1.0215
Steps: 13041 Loss:1.0984 , Current Loss: 1.1725
Steps: 13141 Loss:1.0841 , Current Loss: 1.0826
Epoch: 1 , Step: 13212 , loss: 1.1112 , Time: 11709.9
Model Saved
Steps: 13312 Loss:1.0673 , Current Loss: 0.9973
Steps: 13412 Loss:1.0582 , Current Loss: 1.0824
Steps: 13512 Loss:1.06

In [None]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 27409 Loss:0.8657 , Current Loss: 0.9147
Steps: 27509 Loss:0.8580 , Current Loss: 0.7973
Steps: 27609 Loss:0.8644 , Current Loss: 0.9745
Steps: 27709 Loss:0.8687 , Current Loss: 0.9120
Steps: 27809 Loss:0.8565 , Current Loss: 0.8624
Steps: 27909 Loss:0.8643 , Current Loss: 0.9333
Steps: 28009 Loss:0.8565 , Current Loss: 0.7526
Steps: 28109 Loss:0.8686 , Current Loss: 0.8453
Steps: 28209 Loss:0.8642 , Current Loss: 0.9026
Steps: 28309 Loss:0.8658 , Current Loss: 0.8748
Steps: 28409 Loss:0.8768 , Current Loss: 0.8207
Steps: 28509 Loss:0.8549 , Current Loss: 0.8667
Steps: 28609 Loss:0.8789 , Current Loss: 0.9216
Steps: 28709 Loss:0.8722 , Current Loss: 0.8870
Steps: 28809 Loss:0.8816 , Current Loss: 0.7889
Steps: 28909 Loss:0.8655 , Current Loss: 0.8497
Steps: 29009 Loss:0.8688 , Current Loss: 0.9067
Epoch: 1 , Step: 29080 , loss: 0.8658 , Time: 14157.9
Model Saved


In [None]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 29180 Loss:0.8276 , Current Loss: 0.8597
Steps: 29280 Loss:0.8390 , Current Loss: 0.8335
Steps: 29380 Loss:0.8426 , Current Loss: 0.8314
Steps: 29480 Loss:0.8485 , Current Loss: 0.9055
Steps: 29580 Loss:0.8572 , Current Loss: 0.8291
Steps: 29680 Loss:0.8434 , Current Loss: 0.8401
Steps: 29780 Loss:0.8576 , Current Loss: 0.9258
Steps: 29880 Loss:0.8597 , Current Loss: 0.8240
Steps: 29980 Loss:0.8574 , Current Loss: 0.8616
Steps: 30080 Loss:0.8562 , Current Loss: 0.9875
Steps: 30180 Loss:0.8577 , Current Loss: 0.9439
Steps: 30280 Loss:0.8566 , Current Loss: 0.9347
Steps: 30380 Loss:0.8541 , Current Loss: 0.9795
Steps: 30480 Loss:0.8482 , Current Loss: 0.9477
Steps: 30580 Loss:0.8625 , Current Loss: 0.8053
Steps: 30680 Loss:0.8471 , Current Loss: 0.9104
Steps: 30780 Loss:0.8610 , Current Loss: 0.7927
Epoch: 1 , Step: 30851 , loss: 0.8516 , Time: 16681.4
Model Saved
Steps: 30951 Loss:0.8389 , Current Loss: 0.9358
Steps: 31051 Loss:0.8285 , Current Loss: 0.8432
Steps: 31151 Loss:0.83

In [None]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 38535 Loss:0.7951 , Current Loss: 0.7292
Steps: 38635 Loss:0.7887 , Current Loss: 0.8071
Steps: 38735 Loss:0.7925 , Current Loss: 0.7715
Steps: 38835 Loss:0.7859 , Current Loss: 0.8336
Steps: 38935 Loss:0.7952 , Current Loss: 0.7808
Steps: 39035 Loss:0.7931 , Current Loss: 0.8661
Steps: 39135 Loss:0.7966 , Current Loss: 0.8356
Steps: 39235 Loss:0.7950 , Current Loss: 0.7296
Steps: 39335 Loss:0.7950 , Current Loss: 0.8771
Steps: 39435 Loss:0.7980 , Current Loss: 0.8438
Steps: 39535 Loss:0.7938 , Current Loss: 0.7535
Steps: 39635 Loss:0.7976 , Current Loss: 0.7921
Steps: 39735 Loss:0.7978 , Current Loss: 0.8017
Steps: 39835 Loss:0.7941 , Current Loss: 0.8090
Steps: 39935 Loss:0.7975 , Current Loss: 0.7531
Steps: 40035 Loss:0.7969 , Current Loss: 0.7633
Steps: 40135 Loss:0.7976 , Current Loss: 0.7486
Epoch: 1 , Step: 40206 , loss: 0.7948 , Time: 12631.6
Model Saved
Steps: 40306 Loss:0.7760 , Current Loss: 0.7280
Steps: 40406 Loss:0.7711 , Current Loss: 0.7700
Steps: 40506 Loss:0.79

Steps: 54303 Loss:0.7289 , Current Loss: 0.7333
Epoch: 9 , Step: 54374 , loss: 0.7356 , Time: 11870.8
Model Saved
Steps: 54474 Loss:0.7176 , Current Loss: 0.8152
Steps: 54574 Loss:0.7330 , Current Loss: 0.6808
Steps: 54674 Loss:0.7182 , Current Loss: 0.8317
Steps: 54774 Loss:0.7238 , Current Loss: 0.7688
Steps: 54874 Loss:0.7367 , Current Loss: 0.7859
Steps: 54974 Loss:0.7350 , Current Loss: 0.7578
Steps: 55074 Loss:0.7257 , Current Loss: 0.7791
Steps: 55174 Loss:0.7289 , Current Loss: 0.7795
Steps: 55274 Loss:0.7393 , Current Loss: 0.7647
Steps: 55374 Loss:0.7270 , Current Loss: 0.6881
Steps: 55474 Loss:0.7359 , Current Loss: 0.7492
Steps: 55574 Loss:0.7307 , Current Loss: 0.6852
Steps: 55674 Loss:0.7321 , Current Loss: 0.7210
Steps: 55774 Loss:0.7290 , Current Loss: 0.7873
Steps: 55874 Loss:0.7375 , Current Loss: 0.7574
Steps: 55974 Loss:0.7302 , Current Loss: 0.7369
Steps: 56074 Loss:0.7255 , Current Loss: 0.8171
Epoch: 10 , Step: 56145 , loss: 0.7295 , Time: 11999.6
Model Saved
Val

In [10]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 56545 Loss:0.7085 , Current Loss: 0.6290
Steps: 56645 Loss:0.7073 , Current Loss: 0.6460
Steps: 56745 Loss:0.7227 , Current Loss: 0.6688
Steps: 56845 Loss:0.7221 , Current Loss: 0.7113
Steps: 56945 Loss:0.7254 , Current Loss: 0.6511
Steps: 57045 Loss:0.7270 , Current Loss: 0.7832
Steps: 57145 Loss:0.7149 , Current Loss: 0.6987
Steps: 57245 Loss:0.7233 , Current Loss: 0.6848
Steps: 57345 Loss:0.7170 , Current Loss: 0.7284
Steps: 57445 Loss:0.7317 , Current Loss: 0.7737
Steps: 57545 Loss:0.7304 , Current Loss: 0.7958
Steps: 57645 Loss:0.7316 , Current Loss: 0.7285
Steps: 57745 Loss:0.7226 , Current Loss: 0.6010
Steps: 57845 Loss:0.7335 , Current Loss: 0.6583
Steps: 57945 Loss:0.7232 , Current Loss: 0.8144
Steps: 58045 Loss:0.7307 , Current Loss: 0.7244
Steps: 58145 Loss:0.7222 , Current Loss: 0.7913
Epoch: 1 , Step: 58216 , loss: 0.7233 , Time: 11850.3
Model Saved
Steps: 58316 Loss:0.7140 , Current Loss: 0.7330
Steps: 58416 Loss:0.7123 , Current Loss: 0.7229
Steps: 58516 Loss:0.70

Steps: 72313 Loss:0.7032 , Current Loss: 0.7758
Epoch: 9 , Step: 72384 , loss: 0.6873 , Time: 13535.2
Model Saved
Steps: 72484 Loss:0.6700 , Current Loss: 0.6157
Steps: 72584 Loss:0.6802 , Current Loss: 0.7602
Steps: 72684 Loss:0.6891 , Current Loss: 0.5675
Steps: 72784 Loss:0.6744 , Current Loss: 0.6223
Steps: 72884 Loss:0.6926 , Current Loss: 0.7445
Steps: 72984 Loss:0.6794 , Current Loss: 0.5977
Steps: 73084 Loss:0.6821 , Current Loss: 0.6480
Steps: 73184 Loss:0.6748 , Current Loss: 0.6674
Steps: 73284 Loss:0.6874 , Current Loss: 0.6323
Steps: 73384 Loss:0.6836 , Current Loss: 0.6866
Steps: 73484 Loss:0.6829 , Current Loss: 0.7178
Steps: 73584 Loss:0.6922 , Current Loss: 0.7921
Steps: 73684 Loss:0.6915 , Current Loss: 0.7594
Steps: 73784 Loss:0.6824 , Current Loss: 0.7409
Steps: 73884 Loss:0.6895 , Current Loss: 0.6244
Steps: 73984 Loss:0.6904 , Current Loss: 0.7668
Steps: 74084 Loss:0.6867 , Current Loss: 0.7151
Epoch: 10 , Step: 74155 , loss: 0.6843 , Time: 13776.1
Model Saved
Val

### deacy rate changed to 0.9

In [10]:
num_epochs = 10
val_period = 2
loss_collection = []
val_collection = []
for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 74255 Loss:0.6738 , Current Loss: 0.6780
Steps: 74355 Loss:0.6674 , Current Loss: 0.6000
Steps: 74455 Loss:0.6731 , Current Loss: 0.7061
Steps: 74555 Loss:0.6607 , Current Loss: 0.7031
Steps: 74655 Loss:0.6778 , Current Loss: 0.7995
Steps: 74755 Loss:0.6785 , Current Loss: 0.8578
Steps: 74855 Loss:0.6825 , Current Loss: 0.6331
Steps: 74955 Loss:0.6709 , Current Loss: 0.7106
Steps: 75055 Loss:0.6681 , Current Loss: 0.5998
Steps: 75155 Loss:0.6732 , Current Loss: 0.6321
Steps: 75255 Loss:0.6782 , Current Loss: 0.5446
Steps: 75355 Loss:0.6778 , Current Loss: 0.7222
Steps: 75455 Loss:0.6696 , Current Loss: 0.6757
Steps: 75555 Loss:0.6553 , Current Loss: 0.6280
Steps: 75655 Loss:0.6693 , Current Loss: 0.7712
Steps: 75755 Loss:0.6861 , Current Loss: 0.7783
Steps: 75855 Loss:0.6606 , Current Loss: 0.6652
Epoch: 1 , Step: 75926 , loss: 0.6720 , Time: 14196.2
Model Saved
Steps: 76026 Loss:0.6716 , Current Loss: 0.7091
Steps: 76126 Loss:0.6695 , Current Loss: 0.6860
Steps: 76226 Loss:0.67

KeyboardInterrupt: 

In [None]:
start_time = time.time()
train_loss, train_pred, train_true, train_pred_pos, train_true_pos = model(x_id_train, y_train, y_pos_train, mask_train, sense_mask_train, train_cond=False)        
f1_, accu_, f1_pos_, accu_pos_ = etrain_score(train_true, train_pred, train_true_pos, train_pred_pos)
time_taken = time.time() - start_time
print("train: F1 Score:{0:.2f}".format(f1_), "Accuracy:{0:.2f}".format(accu_), " POS: F1 Score:{0:.2f}".format(f1_pos_), "Accuracy:{0:.2f}".format(accu_pos_), "Loss:{0:.4f}".format(train_loss), ", Time: {0:.1f}".format(time_taken))

In [9]:
saver.restore(sess, save_dir)