In [9]:
import nltk
import sys
import xml.etree.ElementTree as ET
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_ic as wn_ic
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize
from nltk.wsd import lesk
from nltk.corpus import stopwords
from nltk.corpus import wordnet_ic
from nltk.corpus import brown
brown_ic = wordnet_ic.ic('ic-brown.dat')
semcor_ic = wordnet_ic.ic('ic-semcor.dat')

# Training Data Collection

## Training data extracting & parsing

In [10]:
# Gets root of the xml file
def get_root_of_training_file(training_file_name):
    xml_file = ET.parse(training_file_name)
    return xml_file.getroot()

# Get all text of a given node. before and after target word.
def get_node_text(node):
    if node.text:
        result = node.text
    else:
        result = ''
    for child in node:
        if child.tail is not None:
            result += child.tail
    return result
    
# Get all the sentences in the lexsub.xml file that are .n or have .n awarded by humans.
def get_sentences_to_parse(root, pos_tag):
    sentences = {}
    for lexelt in root:
        target = lexelt.attrib['item']
        target = target.split('.')
        target_tags = target[1:len(target)]
        if len(target_tags) > 1:
            target_tags = [target_tags[-1]];
        if 'n' in target_tags:
            target_word = target[0]
            for instance in lexelt:
                instance_id = instance.attrib['id']
                for context in instance:
                    sentence_text = []
                    if context.text:
                        sentence_text.append(context.text)
                    else: 
                        sentence_text.append('')
                    for child in context:
                        sentence_text.append(child.tail)
                        sentence_text.append(child.text)
                sentences[instance_id] = sentence_text
    return sentences
        
# Get PoS tags for every noun_sentence given
def get_pos_for_sentences(noun_sentences):
    parsed_sentences = {}
    for key in noun_sentences:
        sentence_to_find = noun_sentences[key][0] + noun_sentences[key][2] + noun_sentences[key][1]
        parsed_sentences[key] = get_pos_tags(sentence_to_find)
    return parsed_sentences

# Get PoS tags for a given sentence using nltk package
def get_pos_tags(sentence_to_find):                
    tokenized = nltk.word_tokenize(sentence_to_find)
    return nltk.pos_tag(tokenized)            

## Training data variables

In [11]:
# Point to training file
training_file_name = 'assignment_resources/lexsub_trial.xml'
# Get the root of the training file .XML
root_of_training = get_root_of_training_file(training_file_name)
# Extract all sentences into a dictionary that replacement words are nouns. {id_of_sentence:[sentence,word_to_replace]}
noun_sentences = get_noun_sentences_to_parse(root_of_training)
# Get the POS tags for every word
map_pos_to_nouns = get_pos_for_sentences(noun_sentences)

In [12]:
print(len(noun_sentences))

73


# Preprocessing

## Sentence tools:

In [5]:
# Use a stemmer on the target word
def stem_target_word(word):
    stemmer = PorterStemmer()
    stemword = stemmer.stem(word)
    if wn.synsets(stemword) is None:
        stemword = word
    return stemword

# Return sentence in lowercase
def sentences_to_lowercase(sentence):
    return ([segment.lower() for segment in sentence])

# Preprocess a given sentence with it stemmed target word and lowercase sentence.
def preprocess_sentence(sentence):
    lowercase_sentence = sentences_to_lowercase(sentence)
    stemmed_target_word = stem_target_word(lowercase_sentence[2])
    return lowercase_sentence,stemmed_target_word

# Preprocess post tag sentences. This removes all stop words from the sentence as well as punctuation.
def preprocess_pos_tags(pos_tags):
    processed_tags = {}
    for key in pos_tags:
        current_sentence = pos_tags[key]
        text_sentence = [(sentence[0].lower(),sentence[1]) for sentence in current_sentence]
        stopwords_removed =  [(w[0],w[1]) for w in text_sentence if not w[0] in stopwords.words('english')]
        punctuation = ['!',',','.','?',')','(']
        punctuation_removed = [(w[0],w[1]) for w in stopwords_removed if not w[0] in punctuation]
        processed_tags[key] = punctuation_removed
    return processed_tags

In [6]:
# Remove all stop words, punctuation for graphs and 
preprocessed_pos_tags = preprocess_pos_tags(map_pos_to_nouns)

# WSD

## Word sense disambiguation using measures of word semantic similarity

In [7]:
def get_senses_of_target_words(max_reach=3):
    # Replacement dictionary
    replacement_dict = {}
    # For every sentence that has the target word as a noun
    for sentence in noun_sentences:
        # Get the current sentence
        current_sentence = noun_sentences[sentence]
        # Create array of words that the sentence is made of
        tokenised_sentence = tokenise_sentence([current_sentence[0],current_sentence[2],current_sentence[1]])
        # Create graph of sentence
        graph = create_sentence_graph(tokenised_sentence)
                
        # For every entry in dictionary
        for i in range(0, len(graph)):
            # Get current word
            current_word = graph[i]
            # For every synset in current words dictionary
            for synset in current_word:
                # Get range of indexes to search using max_reach parameter
                indexes_to_search = get_ranges(max_reach,i)
                # For every dictionary that is not this word and in the max reach
                for word_index in indexes_to_search:
                    # If the index exists in the sentence
                    if word_index >= 0 and word_index < len(graph):
                        # Get the word we're looking at dictionary
                        check_word_dictionary = graph[word_index]
                        # For synset in it's dictionary
                        for similarity_synset in check_word_dictionary:
                            # Check to see if synset exists in graph edges already
                            if not similarity_already_measured(current_word[synset],similarity_synset):                                
                                # Calculate similarity
                                sim = synset.wup_similarity(similarity_synset)
                                # If it's not none
                                if sim is not None:
                                    # Store in the current words edges
                                    current_word[synset].append((sim,word_index,similarity_synset))
                                    # Store in the synset we found
                                    check_word_dictionary[similarity_synset].append((sim,i,synset))
        # Score each label
        scored_graph = score_graph(graph)
        
        # Select max label:
        max_graph = sense_sentence(scored_graph)
        
        # Get sense of target word:
        target_word = target_word_sense(current_sentence,max_graph)
        
        # Assign to dictionary with sentence id as key
        replacement_dict[sentence] = target_word
        
    return replacement_dict           
                    
def tokenise_sentence(sentence_array):
    tokenised_sentence = []
    for split in sentence_array:
        tokenised_sentence.append(word_tokenize(split))
    return [item for sublist in tokenised_sentence for item in sublist]

def create_sentence_graph(tokenised_sentence):
    # Create a dictionary where each entry is a word to access its labels
    sentence_dictionary = []
    # For 0 to the end of the tokenised sentence
    for i in range(0, len(tokenised_sentence)-1):
        # Every word dictionary item will hold a dictionary filled with it's synsets which are VERTICES
        sentence_dictionary.append({})
        # Get word synsets
        word_synsets = wn.synsets(tokenised_sentence[i])
        # For every synset
        for synset in word_synsets:
            # Assign the synset dictionary as 0 
            sentence_dictionary[i][synset] = []
    # Return full sentence dictionary
    return sentence_dictionary
    
def get_ranges(max_reach,index):
    # Get previous indexes
    start = index - max_reach
    # Get future indexes
    finish = index + max_reach
    # Get range and turn it into array
    range_of_numbers = list(range(start,finish))
    # Flatten array
    return [x for x in range_of_numbers if x != index]

def similarity_already_measured(synset_dictionary, current_synset):
    # for each array in dictionary entry
    for tup in synset_dictionary:
        # If the tuple already contains the synset that we're checking
        if tup[2] == current_synset:
            # Return True
            return True;
    # If synset doesn't exist, return True
    return False
    
def score_graph(graph):
    # New graph to return
    scored_graph = []
    # For every word in graph
    for word in graph:
        # Create a new dictionary
        scored_synsets = {}
        # For every synset in word
        for synset in word:
            # Get the contents of the dictionary entry
            edges = word[synset]
            # Init empty score
            edge_degree = 0
            # For tuple in the dictionary entry
            for tup in edges:
                # Add the score to edge degree
                edge_degree += tup[0]
            # Reassign the value in the dictionary to it's degree central
            scored_synsets[synset] = edge_degree 
        # Append word with scores to graph array
        scored_graph.append(scored_synsets)
    # Return new scored graph
    return scored_graph
        

def sense_sentence(scored_graph):
    # What will be the final sentence synsets
    sentence = []
    # For every word in the graph
    for word in scored_graph:
        try:
            # Assign current best to the first
            best_word = list(word.keys())[0]
            # Best score to 0
            best_score = 0
        except:
            # None type for best word as it has no synsets
            best_word = None
            # Best score is 0
            best_score = 0
        # For every synset in words dictionary
        for synset in word:
            if word[synset] > best_score:
                # Set best word to new word
                best_word = synset
                # Set best score to new score
                best_score = word[synset]
        # Append to sense sentence
        sentence.append(best_word)
    # Return new sentence
    return sentence
        
def target_word_sense(current_sentence, sentence_graph):
    # Get the position of the target word.
    before_word = len(word_tokenize(current_sentence[0]))
    return sentence_graph[before_word]
    

    
    
    

## Get senses

In [8]:
graph_word_sense_disambiguation = get_senses_of_target_words(4)

KeyboardInterrupt: 

In [None]:
for key in graph_word_sense_disambiguation:
    print(graph_word_sense_disambiguation[key])
print(len(graph_word_sense_disambiguation))

# Word substitution

In [None]:
# Create target word dictionary
def get_target_word_replacement():
    replacements = {}
    # For every sentence thats sense has contents classified with sense.
    for key in graph_word_sense_disambiguation:
        # Get the guessed synset
        current_guessed_synset = graph_word_sense_disambiguation[key]
        # Get the current noun sentence
        current_trial_sentence = noun_sentences[key]  
        # Out of ten array
        oot_array = []
        
        #### Get synset word ####
        synset_name = get_synset_name(current_guessed_synset,current_trial_sentence)
        
        #### Get lemmas of synset ####
        lemmas = get_lemmas(current_guessed_synset, current_trial_sentence[2])
        
        #### Get holonyms of synset ####
        holonyms = get_part_holonyms(current_guessed_synset, current_trial_sentence[2])
        
        #### Get lesk replacement word ####
        sentence = current_trial_sentence[0] + current_trial_sentence[2] + current_trial_sentence[1]
        lesk_replacement = get_synset_name(lesk(sentence,current_trial_sentence[2],'n'),current_trial_sentence[2])
                
        # Create an array out of 10 #
        oot_array = create_oot_array([synset_name,lemmas,holonyms,lesk_replacement],current_trial_sentence[2])
                
        replacements[key] = oot_array
    return replacements


def get_synset_name(synset,word_to_replace):
    return [synset.name().split('.')[0]]


def get_lemmas(synset,word_to_replace):
    lemmas = []
    # Go through lemmas
    for lemma in synset.lemmas():
        lemmas.append(lemma.name())
    return lemmas

def get_part_holonyms(synset,word_to_replace):
    # Go through holonyms
    part_holonyms = []
    for holonym in synset.part_holonyms():
        part_holonyms = [lem.name() for lem in holonym.lemmas()]
    return part_holonyms

def create_oot_array(replacements,word_to_replace):
    # Out of ten array
    oot = []
    # For each list of replacements
    for replacement_list in replacements:
        # For each word in list
        for replacement_word in replacement_list:
            # If it's not equal to the word to replace
            if replacement_word != word_to_replace.lower():
                # And not equal to the stemmed version
                if stem_target_word(word_to_replace.lower()) != stem_target_word(replacement_word):
                    # If there is an underscore -- sometimes the case
                    if '_' in replacement_word:
                        # Split the word by underscores
                        word = replacement_word.split('_')
                        # Create new word with spaces
                        split_word = ''
                        for split in word:
                            split_word += split + ' '
                        replacement_word = split_word
                    # If it's not in there then add it   
                    if replacement_word not in oot:
                        oot.append(replacement_word)
    oot.append('hello')
    return oot
                



In [None]:
# Replacement dictionary
replacement_dictionary = get_target_word_replacement()


# Evaluation

## Create Evaluation Set

In [None]:
# Open gold standard file that contains the annotators answers
def load_in_gold_standard():
    with open('assignment_resources/gold.trial') as f:
        content = f.readlines()
    content.pop(0)
    content = [x.strip() for x in content]
    return [x.split() for x in content]

# Get the noun sentences from the gold standard file
def get_noun_answers(content_array):
    noun_sentences = {}
    for evaluation in content_array:
        split_eval = (evaluation[0].split("."))
        if (len(split_eval) > 2):
            given_tag = split_eval[-1]
        else:
            given_tag = split_eval[1]
        if (given_tag is 'n'):
            instance_id = evaluation[1]
            noun_sentences[instance_id] = evaluation
    return noun_sentences

# Go through the gold standard noun sentences and trn scores into a tuple
def create_answer_tuples(noun_dictionary):
    for key in noun_dictionary:
        # Gold standard row and sliced answers
        gold_standard_row = noun_dictionary[key]
        gold_standard_answers = gold_standard_row[3:len(gold_standard_row)]
        noun_dictionary[key] = answer_list_to_tuples(gold_standard_answers)
    return noun_dictionary

# Gold standard sentence to tuple with scores
def answer_list_to_tuples(gold_standard_answers):   
        answer_tuples = []
        i = 0
        while i <(len(gold_standard_answers)):
            if i is 0:
                word = gold_standard_answers[i]
            elif i is len(gold_standard_answers)-1:
                break;
            else:
                word = (gold_standard_answers[i].split(';'))[1]
            if check_if_tail_word(gold_standard_answers[i+1]):
                index,end_of_tail_word = get_tail_words(i,gold_standard_answers)
                word += end_of_tail_word
                mark = gold_standard_answers[index].split(';')[0]
                i = index
            else:
                mark = gold_standard_answers[i+1].split(';')[0]
                i += 1
            answer_tuples.append((word,mark))
        return answer_tuples
            
# Used in answer_list_to_tuples function
def check_if_tail_word(possible_word):
    return ';' not in possible_word

# Used in answer_list_to_tuples function
def get_tail_words(current_index,answer_list):
    found_last_tail = False
    index = current_index + 1
    tail_word = ''
    while found_last_tail is not True:
        if check_if_tail_word(answer_list[index]):
            tail_word += (' '+ answer_list[index])
            index +=1
        else:
            found_last_tail = True
    return index, tail_word


## Score evaluation

In [None]:
# Score each replacement - Total score
def score_replacements():
    # Score given
    scores = {}
    # Total max score able to be achieved
    max_score = 0
    # For replacement offered in replacement ditionary
    for key in replacement_dictionary:
        
        # Get the ID that we're going to look at
        replacement_words = replacement_dictionary[key]
        
        # Get the gold standard id
        if key in answer_dictionary.keys():
            gold_standard_replacements = answer_dictionary[key]
        else:
            continue;
            
        # Word max score
        word_max_score = 0
        current_score = 0
        
        # For tuple in gold standard answer row
        for word,score in gold_standard_replacements:
            # If score is bigger than the current word max score
            if int(score) > word_max_score:
                # REPLACE
                word_max_score = int(score)
            # For each replacement offered
            for replacement in replacement_words:
                if replacement == word and int(score) > current_score:
                    current_score = int(score)
        max_score += word_max_score
        scores[key] = current_score
        
    return max_score, scores 

# Score precision - Using evaluation method stated in paper
def score_precision(replacement_dictionary,answer_dictionary):
    suggestion_precision = 0
    for noun_id in answer_dictionary:
        gold_standard_tuples = answer_dictionary[noun_id]
        # Calculate total possible score
        total_possible_score = 0
        for tup in gold_standard_tuples:
            total_possible_score += int(tup[1])            
        # Calculate the score given to the suggestions    
        suggestions = replacement_dictionary[noun_id]
        total_score = 0
        for replacement in suggestions:
            for gold_answer in gold_standard_tuples:
                if gold_answer[0] == replacement:
                    total_score += int(gold_answer[1])
    
        suggestion_precision += total_score / total_possible_score
    return suggestion_precision / len(answer_dictionary)

# Score recall - Using evaluation method stated in paper
def score_recall(replacement_dictionary,answer_dictionary):
    total_recall = 0
    for noun_id in answer_dictionary:
        gold_standard_tuples = answer_dictionary[noun_id]
        # Calculate total possible score
        total_possible_score = 0
        for tup in gold_standard_tuples:
            total_possible_score += int(tup[1])
    return suggestion_precision / len(answer_dictionary)
    

def get_zero_scores(result_dictionary):
    bad_scores = {}
    for key in result_dictionary:
        score = result_dictionary[key]
        if score == 0:
            bad_scores[key] = 0
    return bad_scores

## Run evaluation

In [None]:
# Get the gold standard file
gold_standard_contents = load_in_gold_standard()
# Create a dictionary with only the answers with nouns
noun_dictionary = get_noun_answers(gold_standard_contents)
# Process the dictionary into an easily readable answers
answer_dictionary = create_answer_tuples(noun_dictionary)

# Get precision score
# precision = score_precision(replacement_dictionary, answer_dictionary)
# print(precision)

# Get recall score
# recall = score_recall(answer_dictionary)
# print(recall)

# Get total score evaluation
max_score, scores = score_replacements()
final_score = sum(scores.values())
percentage = (final_score/max_score)*100
poor_predictions = get_zero_scores(scores)

In [None]:
print ('The max score that can be achieved for the sentences that were changed is: ',max_score)
print ('-'*40)
print ('The final score of the given replacements is:', final_score, '/', max_score , '(',percentage,'%)')
print ('-'*40)
print ('The following sentences were given a score of zero:')
for key in poor_predictions:
    print ('-'*40)
    if replacement_dictionary[key] != None:
        print(key ,': ',noun_sentences[key][0] + noun_sentences[key][2] + noun_sentences[key][1])
        print("\n")
        print('You gave the following responses:')
        print ('-'*20)
        for word in replacement_dictionary[key]:
            print(word)
    else:
        print('You could not find a suitable prediction for this sentence and returned None') 
    print ('-'*80)
    print('The Gold Standard answers included:')
    for answer,mark in answer_dictionary[key]:
        print (answer,'-', mark)
#     print('The original target word for this sentence was',noun_sentences[key][2])
#     print('You decided that the best word to use was', replacement_dictionary[key])
#     print ('-'*40)

### Lesk Algorithm

In [None]:
# Parse whole sentence into segments
def parse_sentence_to_array(sentence):
    return (sentence[0] + sentence[2] + sentence[1]).split()

# Run the lesk algorithm on a given sentence and target word
def perform_lesk(sentence,word):
    return lesk(sentence,word,'n')

# Create a replacement dictionary for lesk.
def create_lesk_replacement_dictionary(noun_sentences):
    replacement_dictionary = {}
    # For every sentence
    for sentence in noun_sentences:
        # place sentence to lowercase and stem the target word. 
        preprocessed_sentence,target_word = preprocess_sentence(noun_sentences[sentence])     
        # Run the lesk algorithm to get the WSD and sense of the target word.
        context_synset = perform_lesk(parse_sentence_to_array(preprocessed_sentence),target_word)
        
        # this is to catch any errors from the preprocessing and make sure we get an answer back
        if context_synset is None:
            context_synset = perform_lesk(parse_sentence_to_array(preprocessed_sentence),noun_sentences[sentence][2])
            
        # For every lemma on the word sense
        for lemma in context_synset.lemmas():
            # If it doesn't equal the target word already
            if target_word != lemma.name():
                # Get the replacement word
                replacement_word = lemma.name()
                # Check if there is a split in the sentence
                if '_' in replacement_word:
                    split_word = ''
                    for word in replacement_word.split('_'):
                        split_word += ' ' + word
                    replacement_word = split_word[1:]
                # Set the dictionary sentence id to equal the replacement.
                replacement_dictionary[sentence] = replacement_word
                break;
    return replacement_dictionary

In [None]:
def run_graph_word_sense(noun_sentences,max_level):
    print('--- Starting ---')
    word_sensed_dictionary = {}
    # Make a copy of the preprocessed_pos_tags
    pos_tags = preprocessed_pos_tags
    # For every sentence in the sentences
    for key in pos_tags:
        print('--- Getting sentence %d and creating tree ---', (%key))
        # Get current sentence
        current_sentence = pos_tags[key]
        # Create a graph with all synsets that are in the sentence.
        synset_graph = create_graph(current_sentence)
        # For each node(synset) in the graph
        for synset in synset_graph:
            # Build a tree getting all it's lexical relations
            tree = build_synset_tree(0,synset,[],max_level)
            # For every relation in the tree found
            for relation in tree:
                # If it is fond within the graph and it's not referring to itself
                if relation in synset_graph.keys() and relation is not synset:
                    # Get the array value in the dictionary (list of edges)
                    edges = synset_graph[synset]
                    # If the current relation isn't in the nodes edges
                    if relation not in edges:
                        # Append edge to the current synset which goes to the relation found in the graph
                        edges.append(relation)
                        # Assign new edges to graph
                        synset_graph[synset] = edges
                        # Get the edges of whatever the relation found was
                        edges = synset_graph[relation]
                        # Replicate edge in its list
                        edges.append(synset)
                        # Assign edge
                        synset_graph[relation] = edges
                        
        # Now the graphs have a list of edges, create a dictionary of scores.
        scores = {}
        # For every synset in graph
        for synset in synset_graph:
            # Get the degree of its edges
            degree = len(synset_graph[synset])/(len(synset_graph.keys())-1)
            # Add to score
            scores[synset] = degree
        
        # Get the largest score for each synset
        word_senses = get_senses_for_sentence(current_sentence,noun_sentences[key],scores)
        word_sensed_dictionary[key] = word_senses
        print('--- Word sense has been created ---')
    return word_sensed_dictionary


# Create a dictionary graph for every synset within the sentence.
def create_graph(sentence):
    graph = {}
    for pos_tuple in sentence:
        synsets = get_synsets(pos_tuple)
        if synsets is not None:
            for s in synsets:
                graph[s] = [];
    return graph  

# Get all synsets based on the POS tag and given word
def get_synsets(pos_tuple):
    wn_pos_code = wordnet_pos_code(pos_tuple[1])
    if wn_pos_code is not None:
        return wn.synsets(pos_tuple[0],wn_pos_code)

# Map POS tags to wordnet tags.
def wordnet_pos_code(tag):
    if tag.startswith('NN'):
        return wn.NOUN
    elif tag.startswith('VB'):
        return wn.VERB
    elif tag.startswith('JJ'):
        return wn.ADJ
    elif tag.startswith('RB'): 
        return wn.ADV
    else:
        return None
                    

# Given a synset, recursively build a tree based on its lexical and semantic relations .
def build_synset_tree(level,synset,tree,max_level):
    if synset not in tree and level != 0:
        tree.append(synset)
    if level <= max_level:
        # Create trees of all lexical relations.
        
        # Get Hyponyms
        for s in synset.hyponyms():
            build_synset_tree(level+1,s,tree,max_level)
            
        # Get Hypernyms
        for s in synset.hypernyms():
            build_synset_tree(level+1,s,tree,max_level)
            
        # Get Holonyms
        for s in synset.member_holonyms():
            build_synset_tree(level+1,s,tree,max_level)
            
        # Get Meronyms
        for s in synset.part_meronyms():
            build_synset_tree(level+1,s,tree,max_level)
            
        # Get Norminalisations
        for l in synset.lemmas():
            related_forms = l.derivationally_related_forms()
            for rf in related_forms:
                build_synset_tree(level+1,rf.synset(),tree,max_level) 
                
        return tree
    else:
        return tree  
        
# With all scores being calculated, chose the highest scoring senses, if 0 then pick first.
def get_senses_for_sentence(current_sentence,noun_sentences,scores):
    word_sensed_tags = []
    # For tuple in current sentence
    for pos_tuple in current_sentence:
        # Get the synsets of each tuple
        synsets = get_synsets(pos_tuple)
        # if it's not none
        if synsets is not None: 
            # code slip up, sometimes comes as zero.
            if len(synsets) != 0:
                # Keep track of what the max score is.
                max_score = 0
                # Set the current max synset to the first
                max_synset = synsets[0]
                # For every synset for the current word
                for s in synsets:
                    # If score is bigger than zero and the score of the current synset is bigger than the current max
                    if scores[s] < 0 and scores[s] > max_score:
                        # Assign new max score
                        max_score = scores[s]
                        # Assign new synset are the current word sense.
                        max_synset = s
                # Create a tuple, and give the current word alongside the synset sense it has been rewarded.
                word_sensed_tags.append((pos_tuple,max_synset))
            else:
                # If synsets are zero then give back None. We're not doing it like this.
                word_sensed_tags.append((pos_tuple,None))
        else:
            # If no synsets then give back None. We're not doing it like this. 
            word_sensed_tags.append((pos_tuple,None))
    # Return back array of tuples with word senses attached to them.
    return word_sensed_tags

In [None]:
graph_word_sense_disambiguation = run_graph_word_sense(noun_sentences,3) 