### sequence soft hierarchical + convolution for extracting features using word embeddings
#### batch size = 128, learning rate = 0.001, kernel size =  5, num_gpus = 6

In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.WARN)
import pickle
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import os
from tensorflow.python.client import device_lib
from collections import Counter
import time

VERY_BIG_NUMBER = 1e30

  from ._conv import register_converters as _register_converters


In [2]:
f = open('../../Glove/word_embedding_glove', 'rb')
word_embedding = pickle.load(f)
f.close()

word_embedding = word_embedding[: len(word_embedding)-1]

f = open('../../Glove/vocab_glove', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"

# Model Description  
model_name = 'model-aw-lex-seq-hierarchical-1'
model_dir = '../output/all-word/' + model_name
save_dir = os.path.join(model_dir, "save/")
log_dir = os.path.join(model_dir, "log")

if not os.path.exists(model_dir):
    os.mkdir(model_dir)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

with open('../../../dataset/train_val_data_fine/seq_all_word_lex','rb') as f:
    train_data, val_data = pickle.load(f)    


with open('../../../dataset/test_data_fine/seq_all_word_lex','rb') as f:
    test_data = pickle.load(f)  
    
with open('../../../dataset/train_val_data_fine/seq_mask_mat_lex','rb') as f:
    mask_mat = pickle.load(f)    
      
    
# Parameters
mode = 'train'
num_senses = 47
num_pos = 12
num_sense_pos = 5
batch_size = 128
vocab_size = len(vocab)
unk_vocab_size = 1
word_emb_size = len(word_embedding[0])
max_sent_size = 200
hidden_size = 256
num_filter = 256
window_size = 5
kernel_size = 5
keep_prob = 0.3
l2_lambda = 0.001
init_lr = 0.001
decay_steps = 500
decay_rate = 0.99
clip_norm = 1
clipping = True
crf_lambda = 0.05
moving_avg_deacy = 0.999
num_gpus = 6
width = int(window_size/2)

In [3]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, axis=0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [4]:
# MODEL
device_num = 0
tower_grads = []
losses = []
predictions = []
predictions_pos = []
total_trans_params = []

x = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="x")
y = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="y")
y_sp = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="y_sp")
y_pos = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="y_pos")
x_mask  = tf.placeholder('bool', [num_gpus, batch_size, max_sent_size], name='x_mask') 
sense_mask  = tf.placeholder('bool', [num_gpus, batch_size, max_sent_size], name='sense_mask')
is_train = tf.placeholder('bool', [], name='is_train')
word_emb_mat = tf.placeholder('float', [None, word_emb_size], name='emb_mat')
input_keep_prob = tf.cond(is_train,lambda:keep_prob, lambda:tf.constant(1.0))
pretrain = tf.placeholder('bool', [], name="pretrain")
mask_matrix = tf.tile(tf.expand_dims(tf.constant(value=mask_mat, shape=list(np.array(mask_mat).shape), dtype='float32'), 0), [batch_size, 1, 1]) 

global_step = tf.Variable(0, trainable=False, name="global_step")
learning_rate = tf.train.exponential_decay(init_lr, global_step, decay_steps, decay_rate, staircase=True)
summaries = []

def local_attention(input_x, input_mask, W_att):
    flat_x = tf.reshape(input_x, [-1, tf.shape(input_x)[2]])
    h_tanh = tf.tanh(flat_x)
    u_flat = tf.matmul(h_tanh, W_att) 
    u = tf.reshape(u_flat, [batch_size, -1]) + input_mask
    a = tf.expand_dims(tf.nn.softmax(u, 1), 2)
    c = tf.reduce_sum(tf.multiply(input_x, a), axis=1)  
    return c

def global_attention(input_x, input_mask, W_att):
    flat_x = tf.reshape(input_x, [batch_size*max_sent_size, tf.shape(input_x)[2]])
    h_tanh = tf.tanh(flat_x)
    u_flat = tf.matmul(h_tanh, W_att) 
    u = tf.reshape(u_flat, [batch_size, max_sent_size]) + input_mask
    a = tf.expand_dims(tf.nn.softmax(u, 1), 2)
    c = tf.reduce_sum(tf.multiply(input_x, a), axis=1)  
    return c

with tf.variable_scope("word_embedding"):
    unk_word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[unk_vocab_size, word_emb_size], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=0, dtype=tf.float32))
    final_word_emb_mat = tf.concat([word_emb_mat, unk_word_emb_mat], 0)

with tf.variable_scope(tf.get_variable_scope()):
    for gpu_idx in range(num_gpus):
        if gpu_idx>=3:
            device_num = 1
        with tf.name_scope("model_{}".format(gpu_idx)) as scope, tf.device('/gpu:%d' % device_num):

            if gpu_idx > 0:
                    tf.get_variable_scope().reuse_variables()

            with tf.name_scope("word"):
                Wx = tf.nn.embedding_lookup(final_word_emb_mat, x[gpu_idx])  

            float_x_mask = tf.cast(x_mask[gpu_idx], 'float')
            float_sense_mask = tf.cast(sense_mask[gpu_idx], 'float')
            x_len = tf.reduce_sum(tf.cast(x_mask[gpu_idx], 'int32'), axis=1)
            
            tile_x_mask = tf.tile(tf.expand_dims(float_x_mask, 2), [1, 1, word_emb_size])
            Wx_masked = tf.multiply(Wx, tile_x_mask)
            
            with tf.variable_scope("convolution"):
                conv1 = tf.layers.conv1d(inputs=Wx_masked, filters=num_filter, kernel_size=[kernel_size], padding='same', activation=tf.nn.relu)
                conv2 = tf.layers.conv1d(inputs=conv1, filters=num_filter, kernel_size=[kernel_size], padding='same')
                
            with tf.variable_scope("lstm1"):
                cell_fw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
                cell_bw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

                d_cell_fw1 = tf.contrib.rnn.DropoutWrapper(cell_fw1, input_keep_prob=input_keep_prob)
                d_cell_bw1 = tf.contrib.rnn.DropoutWrapper(cell_bw1, input_keep_prob=input_keep_prob)

                (fw_h1, bw_h1), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw1, d_cell_bw1, conv2, sequence_length=x_len, dtype='float', scope='lstm1')
                h1 = tf.concat([fw_h1, bw_h1], 2)

            with tf.variable_scope("lstm2"):
                cell_fw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
                cell_bw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

                d_cell_fw2 = tf.contrib.rnn.DropoutWrapper(cell_fw2, input_keep_prob=input_keep_prob)
                d_cell_bw2 = tf.contrib.rnn.DropoutWrapper(cell_bw2, input_keep_prob=input_keep_prob)

                (fw_h2, bw_h2), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw2, d_cell_bw2, h1, sequence_length=x_len, dtype='float', scope='lstm2')
                h = tf.concat([fw_h2, bw_h2], 2)
                
            attention_mask = (tf.cast(x_mask[gpu_idx], 'float') -1)*VERY_BIG_NUMBER 

            with tf.variable_scope("global_attention"):
                W_att_global = tf.get_variable("W_att_global", shape=[2*hidden_size, 1], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*10))
                flat_h = tf.reshape(h, [batch_size*max_sent_size, tf.shape(h)[2]])
                h_tanh = tf.tanh(flat_h)
                u_flat = tf.matmul(h_tanh, W_att_global) 
                u = tf.reshape(u_flat, [batch_size, max_sent_size]) + attention_mask
                a = tf.expand_dims(tf.nn.softmax(u, 1), 2)
                c = tf.reduce_sum(tf.multiply(h, a), axis=1)
                c_final = tf.tile(tf.expand_dims(c, 1), [1, max_sent_size, 1])
                h_final = tf.concat([c_final, h], 2)
                flat_h_final = tf.reshape(h_final, [batch_size*max_sent_size, tf.shape(h_final)[2]])
                       
            with tf.variable_scope("hidden_layer"):
                W = tf.get_variable("W", shape=[4*hidden_size, 2*hidden_size], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*20))
                b = tf.get_variable("b", shape=[2*hidden_size], initializer=tf.zeros_initializer())
                drop_flat_h_final = tf.nn.dropout(flat_h_final, input_keep_prob)
                flat_hl = tf.matmul(drop_flat_h_final, W) + b
            
            with tf.variable_scope("softmax_layer_pos"):
                W = tf.get_variable("W", shape=[2*hidden_size, num_pos], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*30))
                b = tf.get_variable("b", shape=[num_pos], initializer=tf.zeros_initializer())
                flat_h1 = tf.reshape(h1, [-1, tf.shape(h1)[2]])
                drop_flat_hl = tf.nn.dropout(flat_hl, input_keep_prob)
                flat_logits_pos = tf.matmul(drop_flat_hl, W) + b
                logits_pos = tf.reshape(flat_logits_pos, [batch_size, max_sent_size, num_pos])
                log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(logits_pos, y_pos[gpu_idx], x_len)
                loss_pos = crf_lambda*tf.reduce_mean(-log_likelihood)
                predictions_pos.append(logits_pos)
                total_trans_params.append(trans_params)
                non_sense_hierarchy = tf.expand_dims(tf.reduce_max(logits_pos[:, :, :num_sense_pos-1], axis=2), 2)
                pos_hierarchy = tf.concat([logits_pos[:, :, :num_sense_pos-1], non_sense_hierarchy], 2)
                hierarchy = tf.matmul(pos_hierarchy, mask_matrix)
                
            with tf.variable_scope("softmax_layer"):
                W = tf.get_variable("W", shape=[2*hidden_size, num_senses], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=gpu_idx*20))
                b = tf.get_variable("b", shape=[num_senses], initializer=tf.zeros_initializer())
                drop_flat_hl = tf.nn.dropout(flat_hl, input_keep_prob)
                flat_logits_sense = tf.matmul(drop_flat_hl, W) + b
                logits_org = tf.reshape(flat_logits_sense, [batch_size, max_sent_size, num_senses])
                logits = tf.multiply(logits_org, hierarchy)
                predictions.append(tf.argmax(logits, 2))

            loss = tf.contrib.seq2seq.sequence_loss(logits, y[gpu_idx], 5*float_sense_mask + float_x_mask/10,name="loss")
            l2_loss = l2_lambda * tf.losses.get_regularization_loss()

            total_loss = tf.cond(pretrain, lambda:loss_pos, lambda:loss + loss_pos + l2_loss)

            summaries.append(tf.summary.scalar("loss_{}".format(gpu_idx), loss))
            summaries.append(tf.summary.scalar("loss_pos_{}".format(gpu_idx), loss_pos))
            summaries.append(tf.summary.scalar("total_loss_{}".format(gpu_idx), total_loss))

            optimizer = tf.train.AdamOptimizer(learning_rate)
            grads_vars = optimizer.compute_gradients(total_loss)

            clipped_grads = grads_vars
            if(clipping == True):
                clipped_grads = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in clipped_grads]

            tower_grads.append(clipped_grads)
            losses.append(total_loss)

with tf.device('/gpu:0'):
    tower_grads = average_gradients(tower_grads)
    losses = tf.add_n(losses)/len(losses)
    apply_grad_op = optimizer.apply_gradients(tower_grads, global_step=global_step)
    summaries.append(tf.summary.scalar('total_loss', losses))
    summaries.append(tf.summary.scalar('learning_rate', learning_rate))

    variable_averages = tf.train.ExponentialMovingAverage(moving_avg_deacy, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    train_op = tf.group(apply_grad_op, variables_averages_op)
    saver = tf.train.Saver(tf.global_variables())
    summary = tf.summary.merge(summaries)

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1,2"
# print (device_lib.list_local_devices())
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())                          # For initializing all the variables
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)          # For writing Summaries

In [6]:
save_period = 100
log_period = 100

def model(xx, yy, yy_pos, mask, smask, train_cond=True, pretrain_cond=False):
    num_batches = int(len(xx)/(batch_size*num_gpus))
    _losses = 0
    temp_loss = 0
    preds_sense = []
    true_sense = []
    preds_pos = []
    true_pos = []
    
    for j in range(num_batches): 
        
        s = j * batch_size * num_gpus
        e = (j+1) * batch_size * num_gpus
        xx_re = xx[s:e].reshape([num_gpus, batch_size, -1])
        yy_re = yy[s:e].reshape([num_gpus, batch_size, -1])
        yy_pos_re = yy_pos[s:e].reshape([num_gpus, batch_size, -1])
        mask_re = mask[s:e].reshape([num_gpus, batch_size, -1])
        smask_re = smask[s:e].reshape([num_gpus, batch_size, -1])
 
        feed_dict = {x:xx_re, y:yy_re, y_pos:yy_pos_re, x_mask:mask_re, sense_mask:smask_re, pretrain:pretrain_cond, is_train:train_cond, input_keep_prob:keep_prob, word_emb_mat:word_embedding}
        
        if(train_cond==True):
            _, _loss, step, _summary = sess.run([train_op, losses, global_step, summary], feed_dict)
            summary_writer.add_summary(_summary, step)
            
            temp_loss += _loss
            if((j+1)%log_period==0):
                print("Steps: {}".format(step), "Loss:{0:.4f}".format(temp_loss/log_period), ", Current Loss: {0:.4f}".format(_loss))
                temp_loss = 0
            if((j+1)%save_period==0):
                saver.save(sess, save_path=save_dir)                         
                
        else:
            _loss, pred, crf_logits, trans_params_ = sess.run([total_loss, predictions, predictions_pos, total_trans_params], feed_dict)
            
            for i in range(num_gpus):
                preds_sense.append(pred[i][smask_re[i]])
                true_sense.append(yy_re[i][smask_re[i]])
                true_pos.append(yy_pos_re[i][mask_re[i]])
                temp = []
                for k in range(batch_size):
                    logit_ = crf_logits[i][k][:sum(mask_re[i][k])] # keep only the valid steps
                    viterbi_seq, viterbi_score = tf.contrib.crf.viterbi_decode(logit_, trans_params_[i])
                    temp += viterbi_seq
                preds_pos.append(temp)

        _losses +=_loss

    if(train_cond==False): 
        sense_preds = []
        sense_true = []
        pos_preds = []
        pos_true = []
        
        for preds in preds_sense:
            for ps in preds:      
                sense_preds.append(ps)  
        for trues in true_sense:
            for ts in trues:
                sense_true.append(ts)
        
        for preds in preds_pos:
            for ps in preds:      
                pos_preds.append(ps)      
        for trues in true_pos:
            for ts in trues:
                pos_true.append(ts)
                
        return _losses/num_batches, sense_preds, sense_true, pos_preds, pos_true

    return _losses/num_batches, step

def eval_score(yy, pred, yy_pos, pred_pos):
    f1 = f1_score(yy, pred, average='macro')
    accu = accuracy_score(yy, pred)
    f1_pos = f1_score(yy_pos, pred_pos, average='macro')
    accu_pos = accuracy_score(yy_pos, pred_pos)
    return f1*100, accu*100, f1_pos*100, accu_pos*100

In [12]:
x_id_train = train_data['x']
mask_train = train_data['x_mask']
sense_mask_train = train_data['sense_mask']
y_train = train_data['y']
y_pos_train = train_data['pos']

x_id_val = val_data['x']
mask_val = val_data['x_mask']
sense_mask_val = val_data['sense_mask']
y_val = val_data['y']
y_pos_val = val_data['pos']

x_id_test = test_data['x']
mask_test = test_data['x_mask']
sense_mask_test = test_data['sense_mask']
y_test = test_data['y']
y_pos_test = test_data['pos']

In [13]:
def testing():
    start_time = time.time()
    val_loss, val_pred, val_true, val_pred_pos, val_true_pos = model(x_id_val, y_val, y_pos_val, mask_val, sense_mask_val, train_cond=False)        
    f1_, accu_, f1_pos_, accu_pos_ = eval_score(val_true, val_pred, val_true_pos, val_pred_pos)
    time_taken = time.time() - start_time
    print("Val: F1 Score:{0:.2f}".format(f1_), "Accuracy:{0:.2f}".format(accu_), " POS: F1 Score:{0:.2f}".format(f1_pos_), "Accuracy:{0:.2f}".format(accu_pos_), "Loss:{0:.4f}".format(val_loss), ", Time: {0:.1f}".format(time_taken))
    return f1_, accu_, f1_pos_, accu_pos_

def training(current_epoch, pre_train_cond):
        random = np.random.choice(len(y_train), size=(len(y_train)), replace=False)
        x_id_train_tmp = x_id_train[random]
        y_train_tmp = y_train[random]
        mask_train_tmp = mask_train[random]    
        sense_mask_train_tmp = sense_mask_train[random]
        y_pos_train_tmp = y_pos_train[random]

        start_time = time.time()
        train_loss, step = model(x_id_train_tmp, y_train_tmp, y_pos_train_tmp, mask_train_tmp, sense_mask_train_tmp, pretrain_cond=pre_train_cond)
        time_taken = time.time() - start_time
        print("Epoch: {}".format(current_epoch+1),", Step: {}".format(step), ", loss: {0:.4f}".format(train_loss), ", Time: {0:.1f}".format(time_taken))
        saver.save(sess, save_path=save_dir)                         
        print("Model Saved")
        return [step, train_loss]
    
def evaluate():
    start_time = time.time()
    test_loss1, test_pred1, test_true1, test_pred_pos1, test_true_pos1 = model(x_id_test, y_test, y_pos_test, mask_test, sense_mask_test, train_cond=False)        
    test_loss2, test_pred2, test_true2, test_pred_pos2, test_true_pos2 = model(x_id_test[-num_gpus*batch_size:], y_test[-num_gpus*batch_size:], y_pos_test[-num_gpus*batch_size:], mask_test[-num_gpus*batch_size:], sense_mask_test[-num_gpus*batch_size:], train_cond=False)        
    test_loss = test_loss1 + test_loss2
    test_true = test_true1 + test_true2
    test_pred = test_pred1 + test_pred2
    test_pred_pos = test_pred_pos1 + test_pred_pos2
    test_true_pos = test_true_pos1 + test_true_pos2
    f1_, accu_, f1_pos_, accu_pos_ = eval_score(test_true, test_pred, test_true_pos, test_pred_pos)
    time_taken = time.time() - start_time
    print("Test: F1 Score:{0:.2f}".format(f1_), "Accuracy:{0:.2f}".format(accu_), " POS: F1 Score:{0:.2f}".format(f1_pos_), "Accuracy:{0:.2f}".format(accu_pos_), "Loss:{0:.4f}".format(test_loss), ", Time: {0:.1f}".format(time_taken))
    return f1_, accu_, f1_pos_, accu_pos_

In [None]:
loss_collection = []
val_collection = []
num_epochs = 20
val_period = 1

for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 100 Loss:6.9137 , Current Loss: 5.4475
Steps: 200 Loss:4.1604 , Current Loss: 3.2813
Steps: 300 Loss:2.9144 , Current Loss: 2.6757
Steps: 400 Loss:2.4122 , Current Loss: 2.3758
Steps: 500 Loss:2.1532 , Current Loss: 2.0537
Steps: 600 Loss:1.9806 , Current Loss: 1.9266
Steps: 700 Loss:1.8494 , Current Loss: 1.7897
Steps: 800 Loss:1.7666 , Current Loss: 1.7547
Epoch: 1 , Step: 885 , loss: 2.8912 , Time: 7279.1
Model Saved


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Val: F1 Score:29.89 Accuracy:42.48  POS: F1 Score:89.55 Accuracy:96.29 Loss:1.6599 , Time: 1738.0
Steps: 985 Loss:1.6218 , Current Loss: 1.5338
Steps: 1085 Loss:1.5744 , Current Loss: 1.5560
Steps: 1185 Loss:1.5221 , Current Loss: 1.5210
Steps: 1285 Loss:1.4670 , Current Loss: 1.4393
Steps: 1385 Loss:1.4315 , Current Loss: 1.3596
Steps: 1485 Loss:1.4018 , Current Loss: 1.3325
Steps: 1585 Loss:1.3728 , Current Loss: 1.4093
Steps: 1685 Loss:1.3372 , Current Loss: 1.3128
Epoch: 2 , Step: 1770 , loss: 1.4516 , Time: 7116.8
Model Saved
Val: F1 Score:44.49 Accuracy:56.97  POS: F1 Score:93.84 Accuracy:97.17 Loss:1.3086 , Time: 1655.3
Steps: 1870 Loss:1.2801 , Current Loss: 1.2591
Steps: 1970 Loss:1.2580 , Current Loss: 1.2412
Steps: 2070 Loss:1.2400 , Current Loss: 1.2506
Steps: 2170 Loss:1.2281 , Current Loss: 1.1944
Steps: 2270 Loss:1.2229 , Current Loss: 1.1865
Steps: 2370 Loss:1.1918 , Current Loss: 1.2074
Steps: 2470 Loss:1.1892 , Current Loss: 1.2016
Steps: 2570 Loss:1.1693 , Current Lo

In [None]:
loss_collection = []
val_collection = []
num_epochs = 20
val_period = 2

for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 3055 Loss:1.1019 , Current Loss: 1.1525
Steps: 3155 Loss:1.0957 , Current Loss: 1.0835
Steps: 3255 Loss:1.0907 , Current Loss: 1.0775
Steps: 3355 Loss:1.0870 , Current Loss: 1.0022
Steps: 3455 Loss:1.0792 , Current Loss: 1.1251
Steps: 3555 Loss:1.0763 , Current Loss: 1.1148
Steps: 3655 Loss:1.0667 , Current Loss: 1.0846
Steps: 3755 Loss:1.0610 , Current Loss: 1.0418
Epoch: 1 , Step: 3840 , loss: 1.0807 , Time: 7182.8
Model Saved
Steps: 3940 Loss:1.0355 , Current Loss: 1.0171
Steps: 4040 Loss:1.0304 , Current Loss: 1.0302
Steps: 4140 Loss:1.0250 , Current Loss: 0.9752
Steps: 4240 Loss:1.0321 , Current Loss: 0.9821
Steps: 4340 Loss:1.0251 , Current Loss: 1.0192
Steps: 4440 Loss:1.0177 , Current Loss: 1.0613
Steps: 4540 Loss:1.0254 , Current Loss: 0.9997
Steps: 4640 Loss:1.0263 , Current Loss: 1.0625
Epoch: 2 , Step: 4725 , loss: 1.0261 , Time: 7055.2
Model Saved


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Val: F1 Score:58.20 Accuracy:67.69  POS: F1 Score:95.56 Accuracy:97.79 Loss:1.0284 , Time: 1640.2
Steps: 4825 Loss:0.9968 , Current Loss: 1.0317
Steps: 4925 Loss:0.9851 , Current Loss: 1.0047
Steps: 5025 Loss:0.9896 , Current Loss: 1.0087
Steps: 5125 Loss:0.9883 , Current Loss: 0.9501
Steps: 5225 Loss:0.9885 , Current Loss: 0.9456
Steps: 5325 Loss:0.9904 , Current Loss: 1.0228
Steps: 5425 Loss:0.9860 , Current Loss: 1.0075
Steps: 5525 Loss:0.9789 , Current Loss: 0.9320
Epoch: 3 , Step: 5610 , loss: 0.9876 , Time: 6955.0
Model Saved
Steps: 5710 Loss:0.9576 , Current Loss: 1.0799
Steps: 5810 Loss:0.9548 , Current Loss: 0.9743
Steps: 5910 Loss:0.9598 , Current Loss: 1.0113
Steps: 6010 Loss:0.9622 , Current Loss: 0.9574
Steps: 6110 Loss:0.9664 , Current Loss: 0.9280
Steps: 6210 Loss:0.9663 , Current Loss: 0.9601
Steps: 6310 Loss:0.9578 , Current Loss: 0.9276
Steps: 6410 Loss:0.9549 , Current Loss: 0.9969
Epoch: 4 , Step: 6495 , loss: 0.9602 , Time: 7082.5
Model Saved
Val: F1 Score:60.84 Ac

In [10]:
loss_collection = []
val_collection = []
num_epochs = 20
val_period = 2

for i in range(num_epochs):
    loss_collection.append(training(i, False))
    if((i+1)%val_period==0):
        val_collection.append(testing())

Steps: 12890 Loss:0.8422 , Current Loss: 0.9194
Steps: 12990 Loss:0.8440 , Current Loss: 0.8576
Steps: 13090 Loss:0.8519 , Current Loss: 0.8713
Steps: 13190 Loss:0.8460 , Current Loss: 0.8570
Steps: 13290 Loss:0.8525 , Current Loss: 0.8493
Steps: 13390 Loss:0.8589 , Current Loss: 0.8161
Steps: 13490 Loss:0.8475 , Current Loss: 0.8547
Steps: 13590 Loss:0.8579 , Current Loss: 0.8852
Epoch: 1 , Step: 13675 , loss: 0.8501 , Time: 7098.5
Model Saved
Steps: 13775 Loss:0.8348 , Current Loss: 0.8920
Steps: 13875 Loss:0.8449 , Current Loss: 0.8669
Steps: 13975 Loss:0.8343 , Current Loss: 0.8226
Steps: 14075 Loss:0.8425 , Current Loss: 0.7605
Steps: 14175 Loss:0.8504 , Current Loss: 0.8729
Steps: 14275 Loss:0.8436 , Current Loss: 0.8368
Steps: 14375 Loss:0.8411 , Current Loss: 0.8741
Steps: 14475 Loss:0.8438 , Current Loss: 0.8519
Epoch: 2 , Step: 14560 , loss: 0.8426 , Time: 6746.4
Model Saved


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


Val: F1 Score:64.19 Accuracy:72.52  POS: F1 Score:96.19 Accuracy:98.12 Loss:0.8952 , Time: 1568.2
Steps: 14660 Loss:0.8253 , Current Loss: 0.8799
Steps: 14760 Loss:0.8376 , Current Loss: 0.8739


KeyboardInterrupt: 

In [14]:
evaluate()

Test: F1 Score:42.55 Accuracy:48.26  POS: F1 Score:88.28 Accuracy:95.46 Loss:5.7083 , Time: 13.9


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


(42.546408301477356, 48.259979529170934, 88.27699421388192, 95.45813586097947)

In [None]:
start_time = time.time()
train_loss, train_pred, train_true, train_pred_pos, train_true_pos = model(x_id_train, y_train, y_pos_train, mask_train, sense_mask_train, train_cond=False)        
f1_, accu_, f1_pos_, accu_pos_ = etrain_score(train_true, train_pred, train_true_pos, train_pred_pos)
time_taken = time.time() - start_time
print("train: F1 Score:{0:.2f}".format(f1_), "Accuracy:{0:.2f}".format(accu_), " POS: F1 Score:{0:.2f}".format(f1_pos_), "Accuracy:{0:.2f}".format(accu_pos_), "Loss:{0:.4f}".format(train_loss), ", Time: {0:.1f}".format(time_taken))

In [9]:
saver.restore(sess, save_dir)