In [1]:
import tensorflow as tf
import os
import numpy as np
from math import ceil
import re
from pandas_ml import ConfusionMatrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import time
from util import *
import pickle

# Input

In [2]:
# Load the embeddings matrix and an embedding dictionary
word_sense_embeddings,embedding_dict, hidden_size = load_embeddings('../babelfy_vectors_slim')

In [3]:
path_train = "../SRLData/EN/CoNLL2009-ST-English-train.txt"
path_dev = "../SRLData/EN/CoNLL2009-ST-English-development.txt"

In [4]:
# Read training data
tr_raw_sentences = read_conll(path_train)

In [5]:
# Adding semantic info
add_semantic_info_conll(tr_raw_sentences,'../disambiguated_words_conll_train')

In [5]:
# Generate encoders for POS and ARGS
pos_tags,pos_tag_encoder = list_pos_tags(tr_raw_sentences)
args, args_encoder = list_args(tr_raw_sentences)

args_classes = len(args)
pos_tags_classes = len(pos_tags)
null_code = args_encoder.transform(['_'])[0] #No classification

In [6]:
# Predicate-centered sentences / Windowing
left_words = 20
right_words = 10
window_span = [left_words,right_words]
sentence_length = left_words + right_words + 1

In [8]:
# Generate inputs for the network 
tr_sentences,tr_pred_inds = generate_inputs(tr_raw_sentences,embedding_dict,pos_tag_encoder,window_span)
tr_labels,tr_missed = generate_labels_pos(tr_raw_sentences,args_encoder,pos_tag_encoder,window_span)

# Padding
tr_sentences,tr_lens = pad(tr_sentences,max_length = sentence_length)
tr_labels,_ = pad(tr_labels,max_length = sentence_length)

In [25]:
# Development data
dev_raw_sentences = read_conll(path_dev)
add_semantic_info_conll(dev_raw_sentences,'../disambiguated_words_conll_dev')

dev_sentences,dev_pred_inds = generate_inputs(dev_raw_sentences,embedding_dict,pos_tag_encoder,window_span)
dev_labels,dev_missed = generate_labels_pos(dev_raw_sentences,args_encoder,pos_tag_encoder,window_span)

dev_sentences, dev_lens = pad(dev_sentences,max_length = sentence_length)
dev_labels,_ = pad(dev_labels,max_length = sentence_length)

In [10]:
del dev_raw_sentences,tr_raw_sentences

# Network

    in_sentences: Input sentences. Shape is (batch,sentence_length,2).
    in_labels: Input labels. Shape is (batch,sentence_length,2)
    in_lens: Input sentences' lengths. Shape is (batch)
    in_pred_inds: Input predicate indexes. Shape is (batch)
    in_prob_dropout: Input probability for dropout.
    in_learn_rate: Input learning rate
  

In [7]:
lstm_units = 500

In [8]:
tf.reset_default_graph()

In [9]:
in_sentences = tf.placeholder(tf.int32, shape=[None,sentence_length,2],name='in_sentences')
in_labels = tf.placeholder(tf.int32, shape=[None,sentence_length,2],name='in_labels')
in_lens = tf.placeholder(tf.int32, shape=[None],name='in_lens')
in_pred_inds = tf.placeholder(tf.int32,shape=[None],name='in_pred_inds')
in_prob_dropout = tf.placeholder(tf.float32,name='in_prob_dropout')
in_learn_rate = tf.placeholder(tf.float32,name='in_learn_rate')

t_batch_size = tf.shape(in_sentences)[0]

### Input assembling

In [10]:
# Slicing the sentences vector in order to place the embeddings
# Shape is (batch,sentence_length)
t_sentences_word_indexes = tf.squeeze(tf.slice(in_sentences,(0,0,0),(t_batch_size,sentence_length,1)),-1)

embeddings = tf.Variable(word_sense_embeddings,dtype=tf.float32,trainable=False)

# Replacing the embedding values with the embeddings
# Shape is (batch,sentence_length,hidden_size)
t_sentences_embeddings = tf.nn.embedding_lookup(embeddings,t_sentences_word_indexes)

# Slicing the sentences vector in order to get the pos values
# Shape is (batch,sentence_length,1)
t_sentences_word_pos = tf.cast(tf.slice(in_sentences,(0,0,1),(t_batch_size,sentence_length,1)),tf.float32)

# Concatenating in order to generate the vector
# Shape is (batch,sentence_length,hidden_size + 1)
t_sentences = tf.concat([t_sentences_embeddings,t_sentences_word_pos],-1)

In [11]:
# Generating the binary vectors to append to the sentences
t_bin_vects = tf.one_hot(in_pred_inds,sentence_length)
t_bin_vects = tf.expand_dims(t_bin_vects,-1)

t_sentences = tf.concat([t_sentences,t_bin_vects],axis=-1)

### BiLSTM

In [12]:
cell_fw = tf.contrib.rnn.LSTMCell(lstm_units)
cell_bw = tf.contrib.rnn.LSTMCell(lstm_units)

# Dropout
cell_fw = tf.nn.rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=in_prob_dropout)
cell_bw = tf.nn.rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=in_prob_dropout)

t_bilstm,_ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, t_sentences,sequence_length=in_lens, dtype=tf.float32)

# Concatenating the two hidden states of the BiLSTM output
# Shape is (batch,sentence_length,lstm_units*2)
t_bilstm = tf.concat([t_bilstm[0],t_bilstm[1]],axis=-1)

### Input assembling for the classifier

In [13]:
# Concatenating the batch_index to each predicate index (needed for gather)
# Shape is (batch,2)
t_pred_inds = tf.stack([tf.range(t_batch_size),in_pred_inds],axis=1)

# Selecting the predicates with the right indexes. More precisely, we pick one vector from the sentence_length 
# dimension for every sentence
# Shape is (batch,lstm_units*2)
t_preds = tf.gather_nd(t_bilstm,t_pred_inds)

# Adding the lenght dimension in order to tile
# Shape is (batch,1,lstm_units*2)
t_preds = tf.expand_dims(t_preds,1)

# Tiling the vector predicate along the length dimension in order to concatenate
# Shape is (batch,sentence_length,lstm_units*2)
t_preds = tf.tile(t_preds,[1,sentence_length,1])

# Concatenating along the last dimension word and predicate features
# Shape is (batch,sentence_length,lstm_units*4)
t_pairs = tf.concat([t_bilstm,t_preds],axis=-1)

### Padding removal

In [14]:
# Generating a mask for the padding using the input lengths
# Shape is (batch,sentence_length)
t_mask = tf.sequence_mask(in_lens)

# Applying the mask on the word-predicate pairs, removing spurious pairs. T is the number of times 'True' appears
# in the mask or the sum of all lengths in in_lens.
# Shape is (T,lstm_units*4)
t_pairs = tf.boolean_mask(t_pairs,t_mask)

# Shape of the vectors feed to the classifier
vect_shape = t_pairs.get_shape().as_list()[1]

# Applying the mask on the labels
# Shape is (T,2)
t_labels = tf.boolean_mask(in_labels,t_mask)

t_T = tf.shape(t_labels)[0]

### Labels extraction

In [15]:
# Slicing in order to get the labels for the args
# Shape is (T)
t_labels_args = tf.squeeze(tf.slice(t_labels,(0,0),(t_T,1)),-1)

# Slicing in order to get the labels for the pos tags
# Shape is (T)
t_labels_pos = tf.squeeze(tf.slice(t_labels,(0,1),(t_T,1)),-1)

### Logits and loss for the word-predicate labels

In [16]:
W_args = tf.Variable(tf.truncated_normal([vect_shape,args_classes],stddev=0.1))
b_args = tf.Variable(tf.constant(0., shape=[args_classes]))

t_logits_args = tf.matmul(t_pairs, W_args) + b_args
t_loss_args = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=t_labels_args,logits=t_logits_args,name='sparse_softmax_args')
t_loss_args = tf.reduce_mean(t_loss_args,name='t_loss_args')

### Logits and loss for the pos tag lables

In [17]:
W_pos = tf.Variable(tf.truncated_normal([vect_shape,pos_tags_classes],stddev=0.1))
b_pos = tf.Variable(tf.constant(0., shape=[pos_tags_classes]))

t_logits_pos = tf.matmul(t_pairs, W_pos) + b_pos
t_loss_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=t_labels_pos,logits=t_logits_pos,name='sparse_softmax_pos')
t_loss_pos = tf.reduce_mean(t_loss_pos,name='t_loss_pos')

### Optimizer

In [18]:
t_optimizer = tf.train.AdamOptimizer(in_learn_rate).minimize(t_loss_args + t_loss_pos)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


### Predictions and Accuracy for Roles

In [19]:
t_args = tf.nn.softmax(t_logits_args)
t_args = tf.argmax(t_args,1,output_type=tf.int32)

t_accuracy_args = tf.equal(t_args,t_labels_args)
t_accuracy_args = tf.reduce_mean(tf.cast(t_accuracy_args,tf.float32),name='t_accuracy_args')

### Predictions and Accuracy for POS tags

In [20]:
t_pos_tags = tf.nn.softmax(t_logits_pos)
t_pos_tags = tf.argmax(t_pos_tags,1,output_type=tf.int32)

t_accuracy_pos = tf.equal(t_pos_tags,t_labels_pos)
t_accuracy_pos = tf.reduce_mean(tf.cast(t_accuracy_pos,tf.float32),name='t_accuracy_pos')

### Training the net

In [21]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

summary_writer = tf.summary.FileWriter('./summary', sess.graph)
summary_writer.flush()

In [22]:
saver = tf.train.Saver()

if not os.path.exists("model-Multitask-InputSenses"):
    os.makedirs("model-Multitask-InputSenses")
    
if tf.train.checkpoint_exists('./model-Multitask-InputSenses/model.ckpt'):
    saver.restore(sess, './model-Multitask-InputSenses/model.ckpt')
    print("Previous model restored.")

INFO:tensorflow:Restoring parameters from ./model-Multitask-InputSenses/model.ckpt
Previous model restored.


In [32]:
epochs = 30
batch_size = 100
keep_prob = 0.6
learn_rate = 0.005

batch_index = 0
num_batches_per_epoch = ceil(len(tr_labels)/batch_size)
n_iterations = num_batches_per_epoch*epochs

# Part of training data in order to check overfitting

tr_over_sentences = tr_sentences[:1000]
tr_over_labels = tr_labels[:1000]
tr_over_pred_inds = tr_pred_inds[:1000]
tr_over_lens = tr_lens[:1000]
tr_over_missed = tr_missed[:1000]

# Computing the initial F1 score

feed_dict = {in_sentences : dev_sentences, in_labels :dev_labels, in_pred_inds: dev_pred_inds,in_lens:dev_lens,in_prob_dropout:1}
o_args,o_labels_args = sess.run([t_args,t_labels_args],feed_dict=feed_dict)
_,_,max_f1 = compute_scores(o_args,o_labels_args,null_code,dev_missed)

for ite in range(n_iterations):
    start = time.clock()
    
    # Batch
    bt_sentences = tr_sentences[batch_index*batch_size:(batch_index+1)*batch_size]
    bt_labels = tr_labels[batch_index*batch_size:(batch_index+1)*batch_size]
    bt_pred_inds = tr_pred_inds[batch_index*batch_size:(batch_index+1)*batch_size]
    bt_lens = tr_lens[batch_index*batch_size:(batch_index+1)*batch_size]
    
    batch_index = (batch_index + 1 ) % num_batches_per_epoch
    
    feed_dict = {in_sentences : bt_sentences, in_labels :bt_labels, in_pred_inds: bt_pred_inds,in_lens:bt_lens,in_learn_rate:learn_rate,in_prob_dropout:keep_prob}
    sess.run(t_optimizer,feed_dict=feed_dict)
    
    end = time.clock()
    
    if ite % 10 == 0:
        print('Iteration # ' + str(ite) + ' time: ' + str(end-start))
        
    if ite % 100 == 0:
        
        # Training data
        feed_dict = {in_sentences : tr_over_sentences, in_labels :tr_over_labels, in_pred_inds: tr_over_pred_inds,in_lens:tr_over_lens,in_prob_dropout:1}
        o_args,o_labels_args,o_accuracy_args,o_accuracy_pos = sess.run([t_args,t_labels_args,t_accuracy_args,t_accuracy_pos],feed_dict=feed_dict)
        precision,recall,f1_score = compute_scores(o_args,o_labels_args,null_code,tr_over_missed)
        print('Train data)  Precision: ' + str(precision) + ' Recall: ' +str(recall)+ ' F1 score: '+str(f1_score) )
        print('Train data)  Accuracy on args: '+  str(o_accuracy_args) +  ' Accuracy on pos tags: ' + str(o_accuracy_pos))
        
        # Development data
        feed_dict = {in_sentences : dev_sentences, in_labels :dev_labels, in_pred_inds: dev_pred_inds,in_lens:dev_lens,in_prob_dropout:1}
        o_args,o_labels_args,o_accuracy_args,o_accuracy_pos = sess.run([t_args,t_labels_args,t_accuracy_args,t_accuracy_pos],feed_dict=feed_dict)
        precision,recall,f1_score = compute_scores(o_args,o_labels_args,null_code,dev_missed)
        print('Dev data) Precision: ' + str(precision) + ' Recall: ' +str(recall)+ ' F1 score: '+str(f1_score),flush=True )
        print('Dev data) Accuracy on args: '+  str(o_accuracy_args) +  ' Accuracy on pos tags: ' + str(o_accuracy_pos))
        
        # Save weights only if the score improved
        if f1_score >= max_f1:
            max_f1 = f1_score
            saver.save(sess, './model-Multitask-InputSenses/model.ckpt')
            print('---Weights have been saved---')        

Iteration # 0 time: 3.747049000000004
Train data)  Precision: 0 Recall: 0.0 F1 score: 0
Train data)  Accuracy on args: 0.899899 Accuracy on pos tags: 0.0716848
Dev data) Precision: 0 Recall: 0.0 F1 score: 0
Dev data) Accuracy on args: 0.900079 Accuracy on pos tags: 0.0870422


KeyboardInterrupt: 

# Final Scores

In [23]:
# Loading the weights with the highest F1 score
saver.restore(sess, './model-Multitask-InputSenses/model.ckpt')

INFO:tensorflow:Restoring parameters from ./model-Multitask-InputSenses/model.ckpt


In [26]:
feed_dict = {in_sentences : dev_sentences, in_labels :dev_labels, in_pred_inds: dev_pred_inds,in_lens:dev_lens,in_prob_dropout:1}
o_args,o_labels_args,o_accuracy_args,o_accuracy_pos = sess.run([t_args,t_labels_args,t_accuracy_args,t_accuracy_pos],feed_dict=feed_dict)
precision,recall,f1_score = compute_scores(o_args,o_labels_args,null_code,dev_missed)
print('Dev data) Precision: ' + str(precision) + ' Recall: ' +str(recall)+ ' F1 score: '+str(f1_score) )
print('Dev data) Accuracy on args: '+  str(o_accuracy_args) +  ' Accuracy on pos tags: ' + str(o_accuracy_pos))

Dev data) Precision: 0.890807651434644 Recall: 0.7254958528669311 F1 score: 0.7996978972055492
Dev data) Accuracy on args: 0.965482 Accuracy on pos tags: 0.989537


In [27]:
o_labels_args = args_encoder.inverse_transform(o_labels_args)
o_args = args_encoder.inverse_transform(o_args)

### Confusion Matrix

In [28]:
conf_matr = ConfusionMatrix(o_labels_args,o_args)
print(conf_matr)

Predicted    A0    A1    A2   A3  A4  A5  AA  AM-ADV  AM-CAU  AM-DIR   ...     \
Actual                                                                 ...      
A0         2611   147    36    3   1   0   0       0       0       0   ...      
A1          116  4189    69    8   0   0   0       2       0       1   ...      
A2           49   176   970    5   2   0   0       2       0       4   ...      
A3           15    26    24  147   0   0   0       0       0       0   ...      
A4            0     7    14    1  44   0   0       0       0       1   ...      
A5            0     1     1    0   0   1   0       0       0       0   ...      
AA            0     0     0    0   0   0   0       0       0       0   ...      
AM-ADV        0     1     3    1   0   0   0     108       0       0   ...      
AM-CAU        0     1     2    0   0   0   0       1      25       0   ...      
AM-DIR        0     3     8    0   1   0   0       0       0      10   ...      
AM-DIS        0     0     0 

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  df = df.loc[idx, idx.copy()].fillna(0)  # if some columns or rows are missing


### Classification Report

In [29]:
class_rep = classification_report(o_labels_args,o_args)
print(class_rep)

             precision    recall  f1-score   support

         A0       0.83      0.78      0.80      3363
         A1       0.83      0.82      0.82      5112
         A2       0.74      0.62      0.68      1566
         A3       0.80      0.49      0.61       297
         A4       0.79      0.54      0.64        82
         A5       1.00      0.33      0.50         3
         AA       0.00      0.00      0.00         1
     AM-ADV       0.60      0.46      0.52       235
     AM-CAU       0.83      0.61      0.70        41
     AM-DIR       0.45      0.29      0.36        34
     AM-DIS       0.80      0.64      0.71       195
     AM-EXT       0.70      0.55      0.62        47
     AM-LOC       0.61      0.59      0.60       330
     AM-MNR       0.71      0.53      0.61       420
     AM-MOD       0.97      0.97      0.97       314
     AM-NEG       0.94      0.93      0.93       123
     AM-PNC       0.54      0.41      0.46        71
     AM-PRD       0.00      0.00      0.00   

  'precision', 'predicted', average, warn_for)


# Test data

In [31]:
path_test = '../TestData/test.csv'
path_output_test = '../TestData/test_with_args.csv'

In [32]:
test_raw_sentences = read_conll(path_test)
add_semantic_info_conll(test_raw_sentences,'../disambiguated_words_conll_test')
test_sentences,test_pred_inds = generate_inputs(test_raw_sentences,embedding_dict,pos_tag_encoder,window_span)
test_sentences,test_lens = pad(test_sentences,max_length=sentence_length)

In [35]:
# Prediction
feed_dict = {in_sentences : test_sentences, in_pred_inds: test_pred_inds,in_lens:test_lens,in_prob_dropout:1}
o_roles = sess.run(t_args,feed_dict=feed_dict)

# Adding labels
add_labels(test_raw_sentences,o_roles,window_span,args_encoder)

In [36]:
# Writing to a file
write_labels_conll('../TestData/test.csv','../TestData/test_with_args.csv',test_raw_sentences)