In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.WARN)
import pickle
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import os
from tensorflow.python.client import device_lib
from collections import Counter

In [2]:
f = open('../Glove/word_embedding_glove', 'rb')
word_embedding = pickle.load(f)
f.close()
word_embedding = word_embedding[: len(word_embedding)-1]

f = open('../Glove/vocab_glove', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"

In [3]:
f = open("../../dataset/sense/dict_sense-keys", 'rb')
dict_sense_keys = pickle.load(f)
f.close()

f = open("../../dataset/sense/dict_word-sense", 'rb')
dict_word_sense = pickle.load(f)
f.close()

In [4]:
# Model Description
sense_word = 'force'
model_name = 'multigpu'
sense_word_dir = 'output/' + sense_word
model_dir = sense_word_dir + '/' + model_name
save_dir = os.path.join(model_dir, "save/")
log_dir = os.path.join(model_dir, "log")

if not os.path.exists(sense_word_dir):
    os.mkdir(sense_word_dir)
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

In [5]:
f = open("../../dataset/checkwords/"+ sense_word + "_data", 'rb')
data = pickle.load(f)
f.close()

data_y = []
for i in range(len(data)):
    data_y.append(dict_sense_keys[data[i][0]][2])

sense_count = Counter(data_y)
sense_count = sense_count.most_common()
vocab_sense = [k for k,v in sense_count[:5]]
sense_count

[('14', 1150),
 ('07', 969),
 ('41', 543),
 ('04', 495),
 ('18', 492),
 ('36', 31),
 ('19', 20),
 ('38', 10),
 ('32', 7),
 ('35', 6)]

In [6]:
vocab_sense

['14', '07', '41', '04', '18']

In [7]:
data_x = []
data_label = []
for i in range(len(data)):
    if dict_sense_keys[data[i][0]][2] in vocab_sense:
        data_x.append(data[i][1])
        data_label.append(dict_sense_keys[data[i][0]][2])

print(len(data_label), len(data_y))

# vocab_sense = dict_word_sense[sense_word]

sense2id = dict((s, i) for i,s in enumerate(vocab_sense))
id2sense = dict((i, s) for i,s in enumerate(vocab))

3649 3723


In [8]:
# Parameters
mode = 'train'
num_senses = len(vocab_sense)
batch_size = 64
vocab_size = len(vocab)
unk_vocab_size = 1
word_emb_size = len(word_embedding[0])
max_sent_size = 200
hidden_size = 100
keep_prob = 0.5
l2_lambda = 0.001
init_lr = 0.005
decay_steps = 500
decay_rate = 0.96
clip_norm = 1
clipping = True

In [9]:
index = []
for i in range(len(data_x)):
    index.append(i)

index_train, index_val, label_train, label_val = train_test_split(index, data_label, train_size=0.8, shuffle=True, stratify=data_label, random_state=0)

data_x = np.array(data_x)

x_train = data_x[index_train]
x_val = data_x[index_val]



In [10]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2"
num_gpus = 3

In [11]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [12]:
# MODEL

with tf.device('/cpu:0'):
    tower_grads = []
    
    x = tf.placeholder('int32', [num_gpus, batch_size, max_sent_size], name="x")
    y = tf.placeholder('int32', [num_gpus, batch_size], name="y")
    x_mask  = tf.placeholder('bool', [num_gpus, batch_size, max_sent_size], name='x_mask') 
    is_train = tf.placeholder('bool', [], name='is_train')
    word_emb_mat = tf.placeholder('float', [None, word_emb_size], name='emb_mat')
    input_keep_prob = tf.cond(is_train,lambda:keep_prob, lambda:tf.constant(1.0))

    global_step = tf.Variable(0, trainable=False, name="global_step")

    learning_rate = tf.train.exponential_decay(init_lr, global_step, decay_steps, decay_rate, staircase=True)
    
    with tf.variable_scope(tf.get_variable_scope()):
        for gpu_idx in range(num_gpus):
            with tf.name_scope("model_{}".format(gpu_idx)) as scope, tf.device('/gpu:%d' % i):

                if gpu_idx > 0:
                        tf.get_variable_scope().reuse_variables()

                with tf.variable_scope("word_embedding"):
                    unk_word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[unk_vocab_size, word_emb_size], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=0, dtype=tf.float32))
                    final_word_emb_mat = tf.concat([word_emb_mat, unk_word_emb_mat], 0)

                with tf.name_scope("word"):
                    Wx = tf.nn.embedding_lookup(final_word_emb_mat, x[gpu_idx])  

                x_len = tf.reduce_sum(tf.cast(x_mask[gpu_idx], 'int32'), 1)

                with tf.variable_scope("lstm1"):
                    cell_fw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
                    cell_bw1 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

                    d_cell_fw1 = tf.contrib.rnn.DropoutWrapper(cell_fw1, input_keep_prob=input_keep_prob)
                    d_cell_bw1 = tf.contrib.rnn.DropoutWrapper(cell_bw1, input_keep_prob=input_keep_prob)

                    (fw_h1, bw_h1), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw1, d_cell_bw1, Wx, sequence_length=x_len, dtype='float', scope='lstm1')
                    h1 = tf.concat([fw_h1, bw_h1], 2)

                with tf.variable_scope("lstm2"):
                    cell_fw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
                    cell_bw2 = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

                    d_cell_fw2 = tf.contrib.rnn.DropoutWrapper(cell_fw2, input_keep_prob=input_keep_prob)
                    d_cell_bw2 = tf.contrib.rnn.DropoutWrapper(cell_bw2, input_keep_prob=input_keep_prob)

                    (fw_h2, bw_h2), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw2, d_cell_bw2, h1, sequence_length=x_len, dtype='float', scope='lstm2')
                    h = tf.concat([fw_h2, bw_h2], 2)

                def attention(input_x, input_mask, W_att):
                    h_masked = tf.boolean_mask(input_x, input_mask)
                    h_tanh = tf.tanh(h_masked)
                    u = tf.matmul(h_tanh, W_att)
                    a = tf.nn.softmax(u)
                    c = tf.reduce_sum(tf.multiply(h_tanh, a), 0)  
                    return c
                
                with tf.variable_scope("attention"):
                    W_att = tf.Variable(tf.truncated_normal([2*hidden_size, 1], mean=0.0, stddev=0.1, seed=0), name="W_att")
                    c = tf.expand_dims(attention(h[0], x_mask[gpu_idx[0], W_att), 0)
                    for i in range(1, batch_size):
                        c = tf.concat([c, tf.expand_dims(attention(h[i], x_mask[gpu_idx][i], W_att), 0)], 0)

                with tf.variable_scope("softmax_layer"):
                    W = tf.Variable(tf.truncated_normal([2*hidden_size, num_senses], mean=0.0, stddev=0.1, seed=0), name="W")
                    b = tf.Variable(tf.zeros([num_senses]), name="b")
                    drop_c = tf.nn.dropout(c, input_keep_prob)
                    logits = tf.matmul(drop_c, W) + b
                    predictions = tf.argmax(logits, 1)
#                 with tf.variable_scope("attention"):
#                     W_att = tf.get_variable("W_att", shape=[2*hidden_size, 1], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=0))
#                     c = tf.expand_dims(attention(h[0], x_mask[gpu_idx][0], W_att), 0)
#                     for i in range(1, batch_size):
#                         c = tf.concat([c, tf.expand_dims(attention(h[i], x_mask[gpu_idx][i], W_att), 0)], 0)

#                 with tf.variable_scope("softmax_layer"):
#                     W = tf.get_variable("W", shape=[2*hidden_size, num_senses], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1, seed=0))
#                     b = tf.get_variable("b", shape=[num_senses], initializer=tf.zeros_initializer())
#                     drop_c = tf.nn.dropout(c, input_keep_prob)
#                     logits = tf.matmul(drop_c, W) + b
#                     predictions = tf.argmax(logits, 1)

                loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y[gpu_idx]))


    #             tv_all = tf.trainable_variables()
    #             tv_regu =[]
    #             for t in tv_all:
    #                 if t.name.find('b:')==-1:
    #                     tv_regu.append(t)
    #                     print(gpu_idx, t)
                # l2 Loss
    #             l2_loss = l2_lambda * tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv_regu ])

                total_loss = loss 

                # Optimizer for loss
                optimizer = tf.train.AdamOptimizer(learning_rate)

                # Gradients and Variables for Loss
                grads_vars = optimizer.compute_gradients(total_loss)

                # Clipping of Gradients
                clipped_grads = grads_vars
    #             if(clipping == True):
    #                 clipped_grads = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in clipped_grads]

                # Training Optimizer for Total Loss
    #             train_op = optimizer.apply_gradients(clipped_grads, global_step=global_step)


                tower_grads.append(clipped_grads)

    tower_grads = average_gradients(tower_grads)
    train_op = optimizer.apply_gradients(tower_grads)

            
            
#             # Summaries
#             var_summaries = []
#             for v in tv_all:
#                 var_summary = tf.summary.histogram("{}/var".format(v.name), v)
#                 var_summaries.append(var_summary)

#             var_summaries_merged = tf.summary.merge(var_summaries)

#             loss_summary = tf.summary.scalar("loss", loss)
#             total_loss_summary = tf.summary.scalar("total_loss", total_loss)
#             summary = tf.summary.merge_all()

ValueError: Tried to convert 'input' to a tensor and failed. Error: None values not supported.

In [None]:
gpu_idx

In [15]:
tower_grads[1]

[(<tensorflow.python.framework.ops.IndexedSlices at 0x7fe09aa8dcc0>,
  <tf.Variable 'word_embedding/word_emb_mat:0' shape=(1, 100) dtype=float32_ref>),
 (<tf.Tensor 'model_1/gradients/model_1/lstm1/lstm1/fw/fw/while/fw/basic_lstm_cell/basic_lstm_cell/MatMul/Enter_grad/b_acc_3:0' shape=(200, 400) dtype=float32>,
  <tf.Variable 'lstm1/lstm1/fw/basic_lstm_cell/kernel:0' shape=(200, 400) dtype=float32_ref>),
 (<tf.Tensor 'model_1/gradients/model_1/lstm1/lstm1/fw/fw/while/fw/basic_lstm_cell/basic_lstm_cell/BiasAdd/Enter_grad/b_acc_3:0' shape=(400,) dtype=float32>,
  <tf.Variable 'lstm1/lstm1/fw/basic_lstm_cell/bias:0' shape=(400,) dtype=float32_ref>),
 (<tf.Tensor 'model_1/gradients/model_1/lstm1/lstm1/bw/bw/while/bw/basic_lstm_cell/basic_lstm_cell/MatMul/Enter_grad/b_acc_3:0' shape=(200, 400) dtype=float32>,
  <tf.Variable 'lstm1/lstm1/bw/basic_lstm_cell/kernel:0' shape=(200, 400) dtype=float32_ref>),
 (<tf.Tensor 'model_1/gradients/model_1/lstm1/lstm1/bw/bw/while/bw/basic_lstm_cell/basic_

In [12]:

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())                          # For initializing all the variables
saver = tf.train.Saver()                                             # For Saving the model
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)          # For writing Summaries

In [18]:
graph = tf.Graph()

In [None]:
with graph.as_default():
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)

In [16]:
def data_prepare(x, y):
    num_examples = len(x)

    xx = np.zeros([num_examples, max_sent_size], dtype=int)
    xx_mask = np.zeros([num_examples, max_sent_size], dtype=bool)
    yy = np.zeros([num_examples], dtype=int)

    for j in range(num_examples):
        for i in range(max_sent_size):
            if(i>=len(x[j])):
                break
            w = x[j][i]
            xx[j][i] = word2id[w] if w in word2id else word2id['UNKNOWN_TOKEN']
            xx_mask[j][i] = True
        yy[j] = sense2id[y[j]]
    return xx, xx_mask, yy

def eval_score(yy, pred):
    num_batches = int(len(yy)/batch_size)
    f1 = f1_score(yy[:batch_size*num_batches], pred, average='macro')
    accu = accuracy_score(yy[:batch_size*num_batches], pred)
    return f1*100, accu*100

def model(xx, yy, mask, train_cond=True):
    num_batches = int(len(xx)/batch_size)
    losses = 0
    preds = []
    for j in range(num_batches): 
        
        s = j * batch_size
        e = (j+1) * batch_size
        
        feed_dict = {x:xx[s:e], y:yy[s:e], x_mask:mask[s:e], is_train:train_cond, input_keep_prob:keep_prob, word_emb_mat:word_embedding}
        
        
        if(train_cond==True):
            _, _loss, step, _summary = sess.run([train_op, total_loss, global_step, summary], feed_dict)
            summary_writer.add_summary(_summary, step)  
#             print("Steps:{}".format(step), ", Loss: {}".format(_loss))

        else:
            _loss, pred = sess.run([total_loss, predictions], feed_dict)
            preds.append(pred)
            
        losses +=_loss

    if(train_cond==False):
        y_pred = []
        for i in range(num_batches):
            for pred in preds[i]:
                y_pred.append(pred)
        return losses/num_batches, y_pred
    
    return losses/num_batches, step

In [17]:
x_id_train, mask_train, y_train = data_prepare(x_train, label_train)
x_id_val, mask_val, y_val = data_prepare(x_val, label_val)

In [15]:
num_epochs = 60
log_period = 5

for i in range(num_epochs):
    random = np.random.choice(len(y_train), size=(len(y_train)), replace=False)
    x_id_train = x_id_train[random]
    y_train = y_train[random]
    mask_train = mask_train[random]    
    
    losses, step = model(x_id_train, y_train, mask_train)
    print("Epoch:", i+1,"Step:", step, "loss:",losses)
    
    if((i+1)%log_period==0):
        saver.save(sess, save_path=save_dir)                         
        print("Model Saved")
        train_loss, train_pred = model(x_id_train, y_train, mask_train, train_cond=False)
        f1_, accu_ = eval_score(y_train, train_pred)
        print("Train: F1 Score: ",  f1_, "Accuracy: ", accu_, "Loss: ", train_loss)
        val_loss, val_pred = model(x_id_val, y_val, mask_val, train_cond=False)
        f1_, accu_ = eval_score(y_val, val_pred)
        print("Val: F1 Score: ",  f1_, "Accuracy: ", accu_, "Loss: ", val_loss)
        
# test_loss, test_pred, test_pred_pos, test_true_pos = model(x_id_test, y_test, mask_test, pos_id_test, train_cond=False)        
# f1_, accu_, f1_pos_, accu_pos_ = etest_score(y_test, test_pred, test_pred_pos, test_true_pos)
# print("test: F1 Score: ",  f1_, "Accuracy: ", accu_, "POS F1 Score: ",  f1_pos_, "POS Accuracy: ", accu_pos_, "Loss: ", test_loss)

Epoch: 1 Step: 45 loss: 7.25343734953
Epoch: 2 Step: 90 loss: 1.4831570175
Epoch: 3 Step: 135 loss: 1.0810598082
Epoch: 4 Step: 180 loss: 0.944087953038
Epoch: 5 Step: 225 loss: 0.863520430194
Model Saved
Train: F1 Score:  83.2738919103 Accuracy:  83.9236111111 Loss:  0.844421825144
Val: F1 Score:  78.3070117359 Accuracy:  78.9772727273 Loss:  0.979586254467
Epoch: 6 Step: 270 loss: 0.801635636224
Epoch: 7 Step: 315 loss: 0.796668424871
Epoch: 8 Step: 360 loss: 0.718497693539
Epoch: 9 Step: 405 loss: 0.690002215571
Epoch: 10 Step: 450 loss: 0.715332831277
Model Saved
Train: F1 Score:  88.9951902583 Accuracy:  89.8958333333 Loss:  0.618835814132
Val: F1 Score:  84.3478170199 Accuracy:  86.0795454545 Loss:  0.726294073192
Epoch: 11 Step: 495 loss: 0.652956566546
Epoch: 12 Step: 540 loss: 0.611100173659
Epoch: 13 Step: 585 loss: 0.607148494985
Epoch: 14 Step: 630 loss: 0.589992363585
Epoch: 15 Step: 675 loss: 0.604815690385
Model Saved
Train: F1 Score:  90.1125042132 Accuracy:  91.1111111

In [None]:
saver.restore(sess, save_dir)