In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.WARN)
import pickle
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import os
from tensorflow.python.client import device_lib

In [2]:
f = open('../Glove/word_embedding_glove', 'rb')
word_embedding = pickle.load(f)
f.close()
word_embedding = word_embedding[: len(word_embedding)-1]

f = open('../Glove/vocab_glove', 'rb')
vocab = pickle.load(f)
f.close()

word2id = dict((w, i) for i,w in enumerate(vocab))
id2word = dict((i, w) for i,w in enumerate(vocab))

unknown_token = "UNKNOWN_TOKEN"

f = open("train.pickle", 'rb')
full_data = pickle.load(f)
f.close()

In [3]:
# Model Description
sense_word = 'hard'
model_name = 'basic'
model_dir = 'output/' + sense_word + '/' + model_name
save_dir = os.path.join(model_dir, "save/")
log_dir = os.path.join(model_dir, "log")

if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)

In [4]:
# Parameters
mode = 'train'
num_senses = 3
batch_size = 64
vocab_size = len(vocab)
unk_vocab_size = 1
word_emb_size = len(word_embedding[0])
max_sent_size = 200
hidden_size = 100
keep_prob = 0.5
l2_lambda = 0.001
init_lr = 0.001
decay_steps = 5000
decay_rate = 0.96
clip_norm = 1
clipping = True

In [5]:
# MODEL
x = tf.placeholder('int32', [batch_size, max_sent_size], name="x")
y = tf.placeholder('int32', [batch_size], name="y")
x_mask  = tf.placeholder('bool', [batch_size, max_sent_size], name='x_mask') 
is_train = tf.placeholder('bool', [], name='is_train')
word_emb_mat = tf.placeholder('float', [None, word_emb_size], name='emb_mat')
input_keep_prob = tf.cond(is_train,lambda:keep_prob, lambda:tf.constant(1.0))
x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 1)

with tf.name_scope("word_embedding"):
    if mode == 'train':
        unk_word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[unk_vocab_size, word_emb_size], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32))
    else:
        unk_word_emb_mat = tf.get_variable("word_emb_mat", shape=[unk_vocab_size, word_emb_size], dtype='float')
        
    final_word_emb_mat = tf.concat([word_emb_mat, unk_word_emb_mat], 0)
    Wx = tf.nn.embedding_lookup(final_word_emb_mat, x)  

with tf.variable_scope("lstm"):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(hidden_size,state_is_tuple=True)

    d_cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=input_keep_prob)
    d_cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=input_keep_prob)
    
    (fw_h, bw_h), _ = tf.nn.bidirectional_dynamic_rnn(d_cell_fw, d_cell_bw, Wx, sequence_length=x_len, dtype='float', scope='lstm')
    h = tf.concat([fw_h, bw_h], 2)

def attention(input_x, input_mask, W_att):
    h_masked = tf.boolean_mask(input_x, input_mask)
    h_tanh = tf.tanh(h_masked)
    u = tf.matmul(h_tanh, W_att)
    a = tf.nn.softmax(u)
    c = tf.reduce_sum(tf.multiply(h_tanh, a), 0)  
    return c

with tf.variable_scope("attention"):
    W_att = tf.Variable(tf.truncated_normal([2*hidden_size, 1], -0.1, 0.1), name="W_att")
#     W_att = tf.get_variable("W_att", shape=[2*hidden_size, 1], initializer=tf.truncated_normal_initializer(stddev=0.1))
    c = tf.expand_dims(attention(h[0], x_mask[0], W_att), 0)
    for i in range(1, batch_size):
        c = tf.concat([c, tf.expand_dims(attention(h[i], x_mask[i], W_att), 0)], 0)
        
with tf.variable_scope("softmax_layer"):
    W = tf.Variable(tf.truncated_normal([2*hidden_size, num_senses], -0.1, 0.1), name="W")
    b = tf.Variable(tf.zeros([num_senses]), name="b")
    drop_c = tf.nn.dropout(c, input_keep_prob)
    logits = tf.matmul(drop_c, W) + b
    predictions = tf.argmax(logits, 1)

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
global_step = tf.Variable(0, trainable=False, name="global_step")

learning_rate = tf.train.exponential_decay(init_lr, global_step, decay_steps, decay_rate, staircase=True)

tv_all = tf.trainable_variables()
tv_regu =[]
for t in tv_all:
    if t.name.find('b:')==-1:
        tv_regu.append(t)
        
# l2 Loss
l2_loss = l2_lambda * tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv_regu ])

total_loss = loss + l2_loss

# Optimizer for loss
optimizer = tf.train.AdamOptimizer(learning_rate)

# Gradients and Variables for Loss
grads_vars = optimizer.compute_gradients(total_loss)

# Clipping of Gradients
clipped_grads = grads_vars
if(clipping == True):
    clipped_grads = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in clipped_grads]

# Training Optimizer for Total Loss
train_op = optimizer.apply_gradients(clipped_grads, global_step=global_step)

# Summaries
var_summaries = []
for v in tv_all:
    var_summary = tf.summary.histogram("{}/var".format(v.name), v)
    var_summaries.append(var_summary)

var_summaries_merged = tf.summary.merge(var_summaries)

loss_summary = tf.summary.scalar("loss", loss)
total_loss_summary = tf.summary.scalar("total_loss", total_loss)
summary = tf.summary.merge_all()

In [6]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"
print (device_lib.list_local_devices())
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(tf.global_variables_initializer())                          # For initializing all the variables
saver = tf.train.Saver()                                             # For Saving the model
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)          # For writing Summaries

[name: "/cpu:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7597343218486361920
, name: "/gpu:0"
device_type: "GPU"
memory_limit: 7958282240
locality {
  bus_id: 1
}
incarnation: 3290425736900701779
physical_device_desc: "device: 0, name: GeForce GTX 1080, pci bus id: 0000:06:00.0"
]


In [66]:
# k-fold Splitting
data_x = np.array(full_data[sense_word][0])
data_y = np.array(full_data[sense_word][2])
kf = KFold(n_splits=5,shuffle=True,random_state=0)

for train_index, test_index in kf.split(data_x):
    print("TRAIN:", train_index, "TEST:", test_index)
    #x_train, x_test = data_x[train_index], data_x[test_index]
    #y_train, y_test = data_y[train_index], data_y[test_index]

TRAIN: [   0    2    3 ..., 4330 4331 4332] TEST: [   1   17   22   23   30   31   33   34   36   39   42   45   49   57   61
   70   72   85   87   88  104  117  118  125  130  134  138  142  148  156
  159  162  170  173  182  185  189  192  195  196  202  203  214  217  218
  220  221  223  224  226  227  248  252  253  286  289  291  295  298  302
  304  308  311  322  326  333  345  347  352  378  380  381  383  398  402
  405  408  410  415  418  429  452  453  457  463  465  467  470  472  481
  482  483  485  491  496  499  501  502  514  517  520  521  526  527  528
  530  531  536  538  543  545  546  553  555  556  559  562  566  567  569
  575  576  578  581  582  587  598  599  607  615  620  621  629  634  636
  638  639  641  648  651  661  663  666  668  678  683  684  685  690  692
  704  706  708  712  716  720  735  748  768  773  775  776  788  794  799
  803  809  817  825  829  836  841  842  843  861  866  867  871  872  882
  883  888  891  895  896  898  900  9

In [64]:
# Splitting
data_x = full_data[sense_word][0]
data_y = full_data[sense_word][2]
x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, train_size=0.8, shuffle=True, stratify=data_y,random_state=0)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.9, shuffle=True, stratify=y_train,random_state=0)



In [None]:
def data_prepare(x):
    num_examples = len(x)

    xx = np.zeros([num_examples, max_sent_size], dtype=int)
    xx_mask = np.zeros([num_examples, max_sent_size], dtype=bool)

    for j in range(num_examples):
        for i in range(max_sent_size):
            if(i>=len(x[j])):
                break
            w = x[j][i]
            xx[j][i] = word2id[w] if w in word2id else word2id['UNKNOWN_TOKEN']
            xx_mask[j][i] = True
            
    return xx, xx_mask

def eval_f1(yy, pred):
    num_batches = int(len(yy)/batch_size)
    f1 = f1_score(yy[:batch_size*num_batches], pred, average='macro')
    return f1*100

def model(xx, yy, mask, train_cond=True):
    num_batches = int(len(xx)/batch_size)
    losses = 0
    preds = []
    for j in range(num_batches): 
        
        s = j * batch_size
        e = (j+1) * batch_size
        
        feed_dict = {x:xx[s:e], y:yy[s:e], x_mask:mask[s:e], is_train:train_cond, input_keep_prob:keep_prob, word_emb_mat:word_embedding}
        
        
        if(train_cond==True):
            _, _loss, step, _summary = sess.run([train_op, total_loss, global_step, summary], feed_dict)
            summary_writer.add_summary(_summary, step)  
#             print("Steps:{}".format(step), ", Loss: {}".format(_loss))

        else:
            _loss, pred = sess.run([total_loss, predictions], feed_dict)
            preds.append(pred)
            
        losses +=_loss

    if(train_cond==False):
        y_pred = []
        for i in range(num_batches):
            for pred in preds[i]:
                y_pred.append(pred)
        return losses/num_batches, y_pred
    
    return losses/num_batches, step

In [None]:
x_id_train, mask_train = data_prepare(x_train)
x_id_val, mask_val = data_prepare(x_val)

In [None]:
num_epochs = 10

for i in range(num_epochs):
    
#     random = np.random.choice(num_train_data, size=(num_train_data), replace=False)
#     X_train = train_x[random]
#     Y_train = train_y[random]
#     X_mask_train = train_x_mask[random]
            
    losses, step = model(x_id_train, y_train, mask_train)
    print("Epoch:", i+1,"Step:", step, "loss:",losses)
    saver.save(sess, save_path=save_dir)                         
    print("Saved Model Complete")
    val_loss, val_pred = model(x_id_val, y_val, mask_val, train_cond=False)
    print("Validation F1 Score: ",  eval_f1(y_val, val_pred), "Loss: ", val_loss)

In [None]:
num_epochs = 10

for i in range(num_epochs):
    
#     random = np.random.choice(num_train_data, size=(num_train_data), replace=False)
#     X_train = train_x[random]
#     Y_train = train_y[random]
#     X_mask_train = train_x_mask[random]
            
    losses, step = model(x_id_train, y_train, mask_train)
    print("Epoch:", i+1,"Step:", step, "loss:",losses)
    saver.save(sess, save_path=save_dir)                         
    print("Saved Model Complete")
    val_loss, val_pred = model(x_id_val, y_val, mask_val, train_cond=False)
    print("Validation F1 Score: ",  eval_f1(y_val, val_pred), "Loss: ", val_loss)

In [None]:
num_epochs = 10

for i in range(num_epochs):
    
#     random = np.random.choice(num_train_data, size=(num_train_data), replace=False)
#     X_train = train_x[random]
#     Y_train = train_y[random]
#     X_mask_train = train_x_mask[random]
            
    losses, step = model(x_id_train, y_train, mask_train)
    print("Epoch:", i+1,"Step:", step, "loss:",losses)
    saver.save(sess, save_path=save_dir)                         
    print("Saved Model Complete")
    val_loss, val_pred = model(x_id_val, y_val, mask_val, train_cond=False)
    print("Validation F1 Score: ",  eval_f1(y_val, val_pred), "Loss: ", val_loss)

In [None]:
num_epochs = 20

for i in range(num_epochs):
    
#     random = np.random.choice(num_train_data, size=(num_train_data), replace=False)
#     X_train = train_x[random]
#     Y_train = train_y[random]
#     X_mask_train = train_x_mask[random]
            
    losses, step = model(x_id_train, y_train, mask_train)
    print("Epoch:", i+1,"Step:", step, "loss:",losses)
    saver.save(sess, save_path=save_dir)                         
    print("Saved Model Complete")
    val_loss, val_pred = model(x_id_val, y_val, mask_val, train_cond=False)
    print("Validation F1 Score: ",  eval_f1(y_val, val_pred), "Loss: ", val_loss)

In [None]:
num_epochs = 30

for i in range(num_epochs):
    
#     random = np.random.choice(num_train_data, size=(num_train_data), replace=False)
#     X_train = train_x[random]
#     Y_train = train_y[random]
#     X_mask_train = train_x_mask[random]
            
    losses, step = model(x_id_train, y_train, mask_train)
    print("Epoch:", i+1,"Step:", step, "loss:",losses)
    saver.save(sess, save_path=save_dir)                         
    print("Saved Model Complete")
    val_loss, val_pred = model(x_id_val, y_val, mask_val, train_cond=False)
    print("Validation F1 Score: ",  eval_f1(y_val, val_pred), "Loss: ", val_loss)

In [None]:

saver.restore(sess, tf.train.latest_checkpoint(save_dir))
embed_mat = sess.run(embedding)