In [2]:
import tensorflow as tf
import os
import numpy as np
from math import ceil
import xml.etree.ElementTree as ET
from util import *

In [None]:
#Util functions

import tensorflow as tf
import numpy as np
import xml.etree.ElementTree as ET
from sklearn.preprocessing import LabelEncoder


pos_tags = ['.','ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM', 'PRON', 'PRT', 'VERB', 'X']
n_tags = len(pos_tags)
le = LabelEncoder().fit(pos_tags)

## load_embeddings ##
# Load previously generated embeddings to build a dictionary label->embedding
# -input
# path_embeddings: path to the .txt file with the embeddings. Format is label<-space->embedding.
# -output
# embedding_dict: dictionary label -> embedding
# hidden_size: size of the embeddings
def load_embeddings(path_embeddings):

	embedding_dict = dict()      

	with open(path_embeddings,'r') as f:

		for embedding_pair in f.readlines():

			label, embedding = embedding_pair.split(' ',1)
			embedding = np.array(embedding.split(),dtype=float)
			embedding_dict[label.strip()] = embedding

	hidden_size = len(embedding)

	return embedding_dict,hidden_size

## compute_output_space ##
# computes the necessary data structure for the output space.
# -input
# xml_file: xml file in input containing the sentences
# ground_truth: txt file containing the babelsids for each ambiguous lemma
# -output
# senses_dict : dictionary sense -> code
# truesenses_dict: dictionary instance_id -> sense
# wf_lemmas_dict: dictionary lemma (of unambiguos words) -> code
# senses_per_lemma_dict: dictionary lemma (of ambiguos word) -> list of sense codes
# out_size: size of the output space (+1 for padding)
def compute_output_space(xml_file,ground_truth):
    
	tree = ET.parse(xml_file)
	root = tree.getroot()

	senses_dict = dict()                        #dictionary: sense -> code
	truesenses_dict = dict()                    #dictionary: instance_id -> sense
	wf_lemmas_dict = dict()                     #dictionary: lemma (of unambiguos words) -> code
	senses_per_lemma_dict = dict()				#dictionary: lemma (of ambiguos word) -> list of sense codes

	#code 0 is used for padding
	code = 1
	#Senses
	with open(ground_truth,'r') as f:
		for line in f.readlines():
			instance_id , sense = line.split()

			truesenses_dict[instance_id] = sense
			if sense not in senses_dict:
				senses_dict[sense] = code
				code += 1

	#Lemmas
	for text in root.getchildren():
		for sentence in text:
			for word in sentence:
				lemma = word.attrib['lemma']
                
                #Unambiguous lemma
				if word.tag == 'wf' and lemma not in wf_lemmas_dict: #add the new lemma to the dictionary	
					wf_lemmas_dict[lemma] = code
					code += 1
                    
                #Ambiguous lemma
				elif word.tag == 'instance':
                    # Take a look at the sense
					sense = senses_dict[truesenses_dict[word.attrib['id']]]
                    # First time we found a sense for the lemma
					if lemma not in senses_per_lemma_dict:
						senses_per_lemma_dict[lemma] = []
                    # The new sense is added to the possible senses of the lemma
					if sense not in senses_per_lemma_dict[lemma]:
						senses_per_lemma_dict[lemma].append(sense)

    
	out_size = code

	return senses_dict,truesenses_dict,wf_lemmas_dict,senses_per_lemma_dict,out_size

## generate_word_vector ##
# returns the word vector using the lemma 
# input
# lemma: lemma of the word
# pos: pos of the word
# embedding_dict: dictionary label -> embedding
# output
# word_vector: the word embedding for the lemma + pos value
def generate_word_vector(lemma,pos,embedding_dict):
    
	pos_value = le.transform([pos])[0]
    
	if lemma in embedding_dict:
		wv = embedding_dict[lemma]
	else:
		wv = embedding_dict['unk']
    
	return np.append(wv,pos_value)

## generate_label ##
# Returns a pair of label for the word. The first label depends on the lemma. If the lemma in unambiguous then
# the first label is the lemma, otherwise it is the correct sense. The second label is the pos tag.
# -input
# word: child node of some sentence node
# senses_dict : dictionary sense -> code
# truesenses_dict: dictionary instance_id -> sense
# wf_lemmas_dict: dictionary lemma -> code
# -output
# label: code of the label + code for the pos

def generate_label(word,senses_dict,truesenses_dict,wf_lemmas_dict):
    
	pos = word.attrib['pos']
	code_pos = le.transform([pos])[0]

    #Unambiguous
	if word.tag == 'wf':
		lemma = word.attrib['lemma']
		if lemma in wf_lemmas_dict:
			code_label = wf_lemmas_dict[lemma]
		else:
			code_label = 0            #Predict 0 for the unk lemma (don't care)
            
    #Ambiguous
	elif word.tag == 'instance':
		sense = truesenses_dict[word.attrib['id']]
		if sense in senses_dict:
			code_label = senses_dict[sense]
		else:
			code_label = -1          #The sense is unknown by the system. Taken into account by the recall.
        
	return np.append(code_label,code_pos)

## generate_code_choices ##
# Returns all possible choices for a lemma (limiting the output space). If the word is a unambiguous lemma then 
# we don't care about the choice. If the lemma is ambiguous then we return all possible senses. If the system 
# doesn't recognize the sense of the lemma it returns -1 which will be taken into account by the recall.
# -input
# word: child node of some sentence node
# senses_per_lemma_dict : dictionary lemma (ambiguous) -> list of possible codes
# -output
# val: 0,-1 or a list
def generate_code_choices(child,senses_per_lemma_dict):
    #Unambiguous lemma
	if child.tag == 'wf':
		val = 0
	else:
    #Ambiguous lemma
		if child.attrib['lemma'] in senses_per_lemma_dict:   #Else if it has known senses, append the list of code
			val = senses_per_lemma_dict[child.attrib['lemma']]
		else:
			val = -1                                         #Otherwise cannot disambiguate
            
	return val
    
## pad_split ##
# Given in input a list of lists, pad_split pads each list with pad_token to a multiple of window_size
# to build a list of lists of fixed sizes.
# -input
# in_lists: list of lists to be padded
# pad_token: token used to pad the internal lists
# window_size: fixed size of the window
# window_overlap: overlap between two consecutive windows
# -output
# padded_lists: list of lists of padded & split lists
# original_lengths: the original lenghts of the lists
def pad_split(in_lists,pad_token,window_size,window_overlap):


	padded_lists = []
	original_lengths = []

	for in_list in in_lists:

		#Splitting
		list_parts = [in_list[i:i + window_size] for i in range(0, len(in_list), window_size - window_overlap)]
		len_parts = [len(piece) for piece in list_parts]

		#Padding could be required just for the last element
		diff = window_size - len_parts[-1]

		for i in range(diff):
			list_parts[-1].append(pad_token)

		padded_lists += list_parts
		original_lengths += len_parts


	return padded_lists, original_lengths

## xml_parser ##
# parse a xml file given the text nodes (padding applied). It uses all previously generated data structures
# to produce a list of (fixed size) sentences, labels and lens used to feed the netwok
# -input
# texts: list of text nodes in a xml file
# window_size: fixed size of the window over the sentence
# window_overlap: overlap between windows
# embedding_dict: dictionary label -> embedding
# senses_dict : dictionary sense -> code
# truesenses_dict: dictionary instance_id -> sense
# wf_lemmas_dict: dictionary lemma (of unambiguos words) -> code
# senses_per_lemma_dict: dictionary lemma (of ambiguos word) -> list of sense codes
# -output
# sentences: parsed sentences, each word is replaced by its feature vector. Each sentence has a fixed size.
# labels: vector of labels, same structure of sentences
# lens: list of the original lengths of the sentences
# code_choices: list of possible codes for each word in the sentence
def xml_parser(texts,window_size,window_overlap,embedding_dict,senses_dict,truesenses_dict,wf_lemmas_dict,senses_per_lemma_dict):
    
	sentences = []                                   
	labels = []                                      
	lens = []                                        
	code_choices = []                                 
    
	for text in texts:
		for sentence in text:
            
			sentence_words = []
			labels_words = []
			code_choices_words = []
        
			for child in sentence.getchildren():
                
				sentence_words.append(generate_word_vector(child.attrib['lemma'],child.attrib['pos'],embedding_dict))
				labels_words.append(generate_label(child,senses_dict,truesenses_dict,wf_lemmas_dict))
                
    

			sentences.append(sentence_words)
			labels.append(labels_words)
			code_choices.append(code_choices_words)
	
	return sentences,labels,lens,code_choices


## scores_senses ##
# computes the precision,recall and the the f1 score taking into account only the senses
# input
# prediction_values : prediction list of lists of probabilities over the output space
# true labels : true labels (int codes)
# final_code: end of the code for senses
# output
# precision: correct_senses/total_sense
# recall: correct_sense/predicted_senses
# f1_score: f1 score
def precision_senses(prediction_values,true_labels,mask_values,final_code):
    
	correct_senses = 0
	disambiguated_senses = 0
	skipped_senses = 0
    
	for snt_pre, snt_tru, snt_msk in zip(prediction_values,true_labels,mask_values):  #Cycling the batch
		for vect_pre, val_tru, val_msk in zip(snt_pre,snt_tru,snt_msk):               #Cycling the sentence
            
			if val_msk == 0: #Either wf_lemma or padding, don't care
				continue
			if val_msk == -1: #Avoid to predict the sense
				skipped_senses += 1
				continue
                
            #Ambiguous lemma, choose only from the senses related to it
            
			disambiguated_senses += 1
            
			prediction_for_lemma = val_msk[np.argmax(vect_pre[val_msk])]
			if val_tru == prediction_for_lemma:
				correct_senses += 1
                
	precision = correct_senses/disambiguated_senses
	recall = correct_senses/(correct_senses+skipped_senses)
	f1_score = (2*precision*recall)/(precision + recall)
	return precision,recall,f1_score

# Loading Data
Given a word in a sentence, we transform it in a feature vector which includes the embedding vector and the POS tag. Thus, a sentence is a sequence of features vectors of the same size. The input space of our network will then be all the possibile features vector.

The output space will be the union of the lemmas of unambiguous words and the possibile senses FOUND in the training data. 





In [9]:
#embedding_dict is a dictionary: word -> embedding
#hidden_size is the size of the embeddings
embedding_dict, hidden_size = load_embeddings('glove.6B.300d.txt')

#the following function generates suitable data structures to represent the output space FROM the training data.
#sense_dict is a dictionary: sense -> code
#truesenses_dict is a dictionary: instance_id -> sense
#wf_lemmas_dict is a dictionary: lemma -> code
#senses_per_lemma_dict: lemma (of an ambiguous word) -> list of possible code senses for the lemma
#out_size is the size of the output space
senses_dict, truesenses_dict, wf_lemmas_dict,senses_per_lemma_dict, out_size = compute_output_space('semcor.data.xml','semcor.gold.key.bnids.txt')
print('The output space size is: ' + str(out_size),flush=True)

#The feature vector will be embedding vector (300) + POS tag (1)
n_features = hidden_size + 1 
print('The features vectors have size: ' + str(n_features))

#Window size for the padding
window_size = 30
window_overlap = 5

keep_prob = 0.8

The output space size is: 49219
The features vectors have size: 301


### Training Data
We load the training data with a xml_parser function which uses the previously generated data structures to get a list of sentences, labels and sentences' lengths

In [10]:
tree = ET.parse('semcor.data.xml')
root = tree.getroot()

tr_sentences,tr_labels,tr_lens,tr_amb_lemmas = xml_parser(root.getchildren(),window_size,window_overlap,embedding_dict,senses_dict,truesenses_dict,wf_lemmas_dict,senses_per_lemma_dict)

### Validation Data
Being the instance_id in the validation data different from the ones found in training data, truesenese_dict needs to be updated

In [17]:
with open('ALL.gold.key.bnids.txt','r') as f:
    for line in f.readlines():
        instance_id, sense = line.split()
        truesenses_dict[instance_id] = sense

We then load the validation data but splitting by dataset. The first part just reads the names of dataset in the data. The second part loads the validation data creating dev_data which is organized as follow:

dev_data = [D_1,D_2,..] where D_x is a dataset

D_x = [sentences,labels,lens] for that dataset

sentences,labels,lens are structed in the same way as before



In [18]:
tree = ET.parse('ALL.data.xml')
root = tree.getroot()

dev_texts = []                                      #list of lists: text split by datasets
datasets_names = []                                 #list: names of the found datasets

dev_data = []      

#First part
for text in root.getchildren():
    dataset_name = text.attrib['id'].split('.')[0]
    
    if dataset_name not in datasets_names:  # add the new dataset
        datasets_names.append(dataset_name)
        dev_texts.append([])
        
    dev_texts[datasets_names.index(dataset_name)].append(text) #assign the (text) node to the right dataset

#Second part
for dataset in dev_texts:
    
    dv_sentences,dv_labels,dv_lens,dv_amb = xml_parser(dataset,window_size,window_overlap,embedding_dict,senses_dict,truesenses_dict,wf_lemmas_dict,senses_per_lemma_dict)
    
    dev_data.append([dv_sentences,dv_labels,dv_lens,dv_amb])

del dev_texts,dv_sentences,dv_labels,dv_lens, dv_amb,root, tree #removing useless data

# Network
The network is structured as following:

Sentences, labels and lengths are read trought placeholder.There is one layer of BiLSTM. Lastly there are two output layer, one used for the label prediction and the other for pos tag prediction.

    inputs (sentences) has shape (batch_size,window_size,n_features)
    targets (labels) has shape (batch_size,window_size,2) (2 = word_label + pos_label)
    seq_lens (lens) has shape (batch_size)

In [19]:
tf.reset_default_graph()

In [20]:
inputs = tf.placeholder(tf.float32, shape=[None,window_size,n_features])
targets = tf.placeholder(tf.int32, shape=[None,None,2])
seq_lens = tf.placeholder(tf.int32, shape=[None])

In [21]:
lstm_units = 500

cell_fw = tf.contrib.rnn.LSTMCell(lstm_units)
cell_bw = tf.contrib.rnn.LSTMCell(lstm_units)

#Dropout
cell_fw = tf.nn.rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=keep_prob)
cell_bw = tf.nn.rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=keep_prob)

outputs,_ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs,sequence_length=seq_lens, dtype=tf.float32)

#Concatenation of the two hidden states
full_output = tf.concat([outputs[0],outputs[1]],axis=-1)
#full_output has shape (batch_size,window_size,lstm_units*2)

#Generating a mask to ignore padding
mask = tf.sequence_mask(seq_lens)
#mask has shape (batch_size,window_size)

#Mask repated in time for the attention layer
mask_repeated = tf.reshape(tf.tile(mask,[1,window_size]),(-1,window_size,window_size))

### Attention Layer

In [22]:
class AttentionWithContext:
    def __init__(self,input_shape, **kwargs):
        super(AttentionWithContext,self).__init__(**kwargs)
        self.build(input_shape)
        
    def build(self,input_shape):
        dense = tf.keras.layers.Dense(input_shape[1],activation=tf.nn.tanh,use_bias=True)
        self.td = tf.keras.layers.TimeDistributed(dense)
        
    def __call__(self,inputs,mask):
        focused_a = self.td(inputs)
        focused_a = tf.exp(focused_a)
        
        if mask is not None:
            mask = tf.cast(mask, tf.float32)
            foucsed_a = mask * focused_a
            
        focused_a /= tf.cast(tf.reduce_sum(focused_a,axis=1,keepdims=True) + tf.keras.backend.epsilon(),tf.float32)
        
        focused_features = tf.keras.backend.batch_dot(focused_a,inputs)
        return [focused_features,focused_a]

In [23]:
att_output, _ = AttentionWithContext(full_output.get_shape().as_list())(full_output,mask_repeated)

output_concat = tf.concat([full_output,att_output],-1)

#Size of the last dimension after the concatenation
aug_features = output_concat.get_shape()[-1]

#Flattening
output_flat = tf.reshape(output_concat,[-1,aug_features])

### Fully connected layer

In [24]:
fc_w = 1024

W_fc = tf.get_variable('W_fc',shape=[aug_features,fc_w],dtype=tf.float32)
b_fc = tf.get_variable('b_fc',shape=[fc_w],dtype=tf.float32) 

h_fc = tf.nn.relu(tf.matmul(output_flat,W_fc) + b_fc)

Slicing the targets placeholder to extract the label for the word and the pos

In [25]:
target_label = tf.squeeze(tf.slice(targets,(0,0,0),(tf.shape(targets)[0],tf.shape(targets)[1],1)),-1)
target_pos = tf.squeeze(tf.slice(targets,(0,0,1),(tf.shape(targets)[0],tf.shape(targets)[1],1)),-1)

### Loss the label for the word

In [26]:
W_label = tf.get_variable('W_label',shape=[fc_w,out_size],dtype=tf.float32)
b_label = tf.get_variable('b_label',shape=[out_size],dtype=tf.float32) 

logits_label = tf.matmul(h_fc,W_label) + b_label
logits_label = tf.reshape(logits_label,[-1,window_size,out_size])

losses_label = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_label,labels=target_label) #UNSCALED LOGITS!
#losses_label has shape (batch_size,window_size). Computes the loss for each word

#Ignore the losses from padding
losses_label = tf.boolean_mask(losses_label,mask)
#losses_label has shape (n) where n is the sum of the lens

#Summing the losses
loss_label = tf.reduce_sum(losses_label)

### Loss the POS for the word

In [27]:
W_pos = tf.get_variable('W_pos',shape=[aug_features,n_tags],dtype=tf.float32)
b_pos = tf.get_variable('b_pos',shape=[n_tags],dtype=tf.float32) 

logits_pos = tf.matmul(output_flat,W_pos) + b_pos
logits_pos = tf.reshape(logits_pos,[-1,window_size,n_tags])

losses_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_pos,labels=target_pos)
losses_pos = tf.boolean_mask(losses_pos,mask)
loss_pos = tf.reduce_sum(losses_pos)

### Total loss

In [28]:
total_loss = loss_pos + loss_label

optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.5).minimize(total_loss)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


### Predictions and Metrics

In [29]:
preds_label_values = tf.nn.softmax(logits_label)
preds_label = tf.argmax(preds_label_values,axis=-1)

#Ignorining the predictions for the padding
preds_label = tf.boolean_mask(preds_label,mask)
true_labels = tf.boolean_mask(target_label,mask)

preds_pos = tf.nn.softmax(logits_pos)
preds_pos = tf.argmax(preds_pos,axis=-1)

preds_pos = tf.boolean_mask(preds_pos,mask)
true_pos = tf.boolean_mask(target_pos,mask)

#Accuracy on POS learning
correct_pos = tf.cast(tf.equal(tf.cast(preds_pos,tf.int32), true_pos),tf.float32)
accuracy_pos = tf.reduce_mean(correct_pos)

### Summary scalars

In [30]:
tf.summary.scalar('loss_label',loss_label)
tf.summary.scalar('loss_pos',loss_pos)
tf.summary.scalar('total_loss',total_loss)

tf.summary.scalar('accuracy_pos',accuracy_pos)

summary = tf.summary.merge_all()

In [31]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

summary_writer = tf.summary.FileWriter('./summary', sess.graph)

In [32]:
saver = tf.train.Saver()

if not os.path.exists("model"):
    os.makedirs("model")
    
if tf.train.checkpoint_exists('./model/model.ckpt'):
    saver.restore(sess, './model/model.ckpt')
    print("Previous model restored.")

INFO:tensorflow:Restoring parameters from ./model/model.ckpt


INFO:tensorflow:Restoring parameters from ./model/model.ckpt


NotFoundError: Key W_fc not found in checkpoint
	 [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/karapost/.local/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/karapost/.local/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/karapost/.local/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/karapost/.local/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/karapost/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/karapost/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/karapost/.local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/karapost/.local/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/karapost/.local/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/karapost/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/karapost/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/karapost/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-32-97fb25fb742e>", line 1, in <module>
    saver = tf.train.Saver()
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 1293, in __init__
    self.build()
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 1302, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 1339, in _build
    build_save=build_save, build_restore=build_restore)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 796, in _build_internal
    restore_sequentially, reshape)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 449, in _AddRestoreOps
    restore_sequentially)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 847, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1030, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3271, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

NotFoundError (see above for traceback): Key W_fc not found in checkpoint
	 [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]


In [36]:
epochs = 4

batch_size = 32
batch_index = 0
num_batches_per_epoch = ceil(len(tr_lens)/batch_size)

n_iterations = num_batches_per_epoch * epochs

for ite in range(n_iterations):
    
    if ite % 10 == 0:
        print('Iteration: ' + str(ite) )
        
    #Batch
    batch_input = tr_sentences[batch_index*batch_size:(batch_index+1)*batch_size]
    batch_label = tr_labels[batch_index*batch_size:(batch_index+1)*batch_size]
    batch_lens = tr_lens[batch_index*batch_size:(batch_index+1)*batch_size]
    batch_dv = tr_amb_lemmas[batch_index*batch_size:(batch_index+1)*batch_size]
    
    batch_index = (batch_index + 1 ) % num_batches_per_epoch
    
    if batch_index % 50 == 0:
        saver.save(sess, './model/model.ckpt')
        print('Development set scores:')
        for dataset,name in zip(dev_data,datasets_names):
            predicted_values,target_values, acc = sess.run([preds_label_values,target_label,accuracy_pos],feed_dict={ inputs : dataset[0],targets : dataset[1], seq_lens : dataset[2]})
            precision,recall,f1_score = precision_senses(predicted_values,target_values,dataset[3],len(senses_dict))
            print('{} : precision on senses {} recall on senses {} f1_score on senses {} accuracy on pos {}'.format(name,precision,recall,f1_score,acc),flush=True)

    _,predicted_values,target_values,acc = sess.run([optimizer,preds_label_values,target_label,accuracy_pos],feed_dict={ inputs : batch_input, targets : batch_label, seq_lens : batch_lens})
    precision,recall,f1_score = precision_senses(predicted_values,target_values,batch_dv,len(senses_dict))
    print('Training: precision on senses {} f1_score on senses {} accuracy on pos {}'.format(precision,f1_score,acc),flush=True)

In [37]:
# Test Data

In [None]:
test_sentences = []

with open('test_data.txt','r') as f:
    for line_sentence in f.readlines():
        words = line_sentence.split()
        test_sentence = []
        for word in words:
            word_parts = word.split('|')
            
            word_vector = generate_word_vector_pos(word_parts[1],word_parts[2],embedding_dict)
            test_sentence.append(word_vector)
        test_sentences.append(test_sentence)

        