In [None]:
from pathlib import Path
import json

network = open("./data/jsons/Network/networks.json", 'r', encoding='utf-8')
microservices = open("./data/jsons/Microservices/microservices.json", 'r', encoding='utf-8')
archDegrad = open("./data/jsons/ArchitecturalDegradation/response.json", 'r', encoding='utf-8')
codeSmell = open("./data/jsons/CodeSmellDetection/response.json", 'r', encoding='utf-8')
medicine = open("./data/jsons/Medicine/response.json", 'r', encoding='utf-8')

network_documents = json.load(network)
microservices_documents = json.load(microservices)
archDegrad_documents = json.load(archDegrad)
codeSmell_documents = json.load(codeSmell)
medicine_documents = json.load(medicine)

network.close()
microservices.close()
archDegrad.close()
codeSmell.close()
medicine.close()

documents = {}
#documents["network"] = network_documents
documents["microservices"] = microservices_documents
type(network_documents)


merge_documents = microservices_documents + archDegrad_documents + codeSmell_documents
merge_documents[101]

In [None]:
!pip install sentencepiece

In [1]:
from transformers import BertTokenizer, BertModel, BertForMaskedLM, XLNetConfig, XLNetModel, XLNetTokenizer
import torch
from torch.nn.functional import one_hot

class BERTComponent:
    tokenizer = None
    bert_model = None

    def __init__(self, model):
        self.bert_vector_size = 3072
        self.sent_vector_size = 768
        self.model = model
        print("Tokenizer: ", BERTComponent.tokenizer)
        self.tokenizer = BERTComponent.tokenizer if BERTComponent.tokenizer else BertTokenizer.from_pretrained(model)
        BERTComponent.tokenizer = self.tokenizer
        self.bert_model = BERTComponent.bert_model if BERTComponent.bert_model else BertModel.from_pretrained(model)
        BERTComponent.bert_model = self.bert_model
        self.bert_model.eval()


    def get_bert_spans(self, words, bert_tokens):
        if self.model == 'bert-large-uncased':
            words = [self._flat_word(word) for word in words]

        i = 0
        j = 1
        idx = 0

        bert_words_indexes = []
        bert_words = []
        while i < len(words):
            word = words[i]

            bert_word = bert_tokens[j]
            bert_word = bert_word[2:] if bert_word.startswith("##") else bert_word
            bert_word = bert_word[idx:]

            #Spacing control
            if word in [" ", "  ", "   "]:
                bert_words.append([word])
                bert_words_indexes.append([-1])

            #When the current word is [UNK] for bert
            elif bert_word == "[UNK]":
                bert_words.append(["[UNK]"])
                bert_words_indexes.append([j])
                j += 1
                idx = 0

            #When the current word is contained in bert token. Very weird
            elif len(word) < len(bert_word) and bert_word.find(word) >= 0:
                bert_words.append([bert_word])
                bert_words_indexes.append([j])

                idx = bert_word.find(word) + len(word)
                if idx == len(bert_word):
                    j += 1
                    idx = 0

            #Otherwise
            else:
                k = 0
                span = []
                span_indexes = []

                while k < len(word):
                    if word.find(bert_word, k) == k:
                        span.append(bert_word)
                        span_indexes.append(j)
                        k += len(bert_word)
                        j += 1
                        idx = 0
                        bert_word = bert_tokens[j]
                        bert_word = bert_word[2:] if bert_word.startswith("##") else bert_word
                    else:
                        print("Error")
                        return bert_words, bert_words_indexes

                bert_words.append(span)
                bert_words_indexes.append(span_indexes)

            i += 1

        assert len(bert_words_indexes) == len(words)

        return bert_words, bert_words_indexes

    def _flat_word(self, word):
        word = word.lower()
        word = word.replace("ñ", "n")
        word = word.replace("á", "a")
        word = word.replace("é", "e")
        word = word.replace("í", "i")
        word = word.replace("ó", "o")
        word = word.replace("ú", "u")
        word = word.replace("ä", "a")
        word = word.replace("ü", "u")
        word = word.replace("ö", "o")
        word = word.replace("ū", "u")
        word = word.replace("ā", "a")
        word = word.replace("ī", "i")
        word = word.replace("ș", "s")
        word = word.replace("ã", "a")
        word = word.replace("ô", "o")

        return word

    def _sum_merge(self, vectors):
        return torch.sum(torch.stack(vectors), dim=0)

    def _mean_merge(self, vectors):
        return torch.mean(torch.stack(vectors), dim=0)

    def _last_merge(self, vectors):
        return vectors[-1]

    def _get_merge_tensors(self, token_vec_sums, words_indexes):
        pad_tensor = torch.zeros(self.bert_vector_size)
        real_vec = []
        for word_indexes in words_indexes:
            vectors = [(token_vec_sums[idx] if idx != -1 else pad_tensor) for idx in word_indexes]
            real_vec.append(self._mean_merge(vectors))

        return real_vec

    def get_bert_embeddings(self, sentence, spans):
        tokenized_sentence = self.tokenizer.tokenize(sentence)
        tokenized_sentence = ['[CLS]'] + tokenized_sentence + ['[SEP]']
        indexed_tokens = self.tokenizer.convert_tokens_to_ids(tokenized_sentence)
        segments_ids = [1] * len(tokenized_sentence)

        tokens_tensor = torch.tensor([indexed_tokens])
        segments_tensors = torch.tensor([segments_ids])

        with torch.no_grad():
            encoded_layers = self.bert_model(tokens_tensor, segments_tensors, output_hidden_states=True)

        #print("This is enconded layers: ", len(encoded_layers.hidden_states))
        
        token_embeddings = torch.stack(encoded_layers.hidden_states, dim=0)
        token_embeddings = torch.squeeze(token_embeddings, dim=1)
        token_embeddings = token_embeddings.permute(1,0,2)

        token_vec_sums = []
        for token in token_embeddings:
            cat_vec = torch.cat((token[-1], token[-2], token[-3], token[-4]), dim=-1)
            token_vec_sums.append(cat_vec)

        words = [sentence[beg:end] for (beg, end) in spans]
        bert_words, bert_words_indexes = self.get_bert_spans(words, tokenized_sentence)

        bert_embeddings = self._get_merge_tensors(token_vec_sums, bert_words_indexes)
        sentence_embedding = torch.mean(torch.stack(token_vec_sums), dim=0)
        
        return bert_embeddings, sentence_embedding

In [2]:
from nltk.tokenize import TreebankWordTokenizer as twt
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

class DocumentRanker:
    
    def __init__(self, documents):
        self.documents = documents
        self.bert = BERTComponent('bert-large-cased')
        self.__model = SentenceTransformer('bert-large-cased')
    
    def __get_info_rep(self, document):
        pass
    
    def __get_embedding(self, text):
#         spans = twt().span_tokenize(text)
#         text_word_embeddings, text_embedding = self.bert.get_bert_embeddings(text, spans)
#         return text_embedding
        return self.__model.encode(text)
    
    def get_related_documents(self, query, number_of_documents):
        index = {}
        last = 0
        related_documents = []
        
        q_sent_embedding = self.__get_embedding(query)
        
        for document in self.documents:
            abstract = document['documentTitle'] + " " + document['workAbstract'] + ". "
            for keyword in document['authorKeywords']:
                abstract += keyword['keyword']
            abstract_embedding = self.__get_embedding(abstract)
            #index[last] = torch.dot(q_sent_embedding, abstract_embedding)
            index[last] = cosine_similarity([q_sent_embedding], [abstract_embedding])[0][0]
            last += 1
        
        doc_scores = list(index.items())
        doc_scores = [(x[0], x[1].tolist()) for x in doc_scores]
        doc_scores= sorted(doc_scores, key = lambda x: x[1], reverse=True)
        #print("Scores: ", scores)
#         probs = F.softmax(scores, dim=0)
#         probs = [t.tolist() for t in probs]
#         probs.sort(reverse=True)
        #print("Probs: ", probs)
        print(doc_scores)
        
        if number_of_documents > len(doc_scores):
            return doc_scores
        else:
            return doc_scores[:number_of_documents]
        

In [None]:
docs = merge_documents
ranker = DocumentRanker(docs)
print(ranker.bert.tokenizer)

In [None]:
related_docs = ranker.get_related_documents("Code smell and anti-patterns degrades the architecture", 10)
related_docs

In [None]:
docs[396]

In [None]:
import re

def get_spans(sentence, begin):
    # printing original string
    print("The original string is : " + str(sentence))

    # regex to get words, loop to get each start and end index
    res = [(begin + ele.start(), begin + ele.end()) for ele in re.finditer(r'\S+', sentence)]

    # printing result
    print("Word Ranges are : " + str(res))

In [None]:
get_spans("Companies that produce energy drinks claim that they can increase alertness and improve physical and mental performance.", 14182)

In [None]:
get_spans("These services employ specially trained people and specially equipped centers.", 14303)

In [3]:
from scripts.utils import Collection
from pathlib import Path

train_20 = Collection()
train_20.load(Path("./data/training/scenario.txt"))

development_20 = Collection()
development_20.load(Path("./data/development/main/scenario.txt"))

scenario1_20A = Collection()
scenario1_20A.load(Path("./data/testing/scenario1-main/scenario.txt"), keyphrases=False, relations=False)
scenario1_20B = Collection()
scenario1_20B.load(Path("./data/testing/scenario1-main/scenario.txt"), keyphrases=True, relations=False)

# scenario2_20 = Collection()
# scenario2_20.load(Path("./data/testing/scenario2-taskA/scenario.txt"), keyphrases=False, relations=False)

# scenario3_20 = Collection()
# scenario3_20.load(Path("./data/testing/scenario3-taskB/scenario.txt"), relations=False)

# scenario4_20A = Collection()
# scenario4_20A.load(Path("./data/testing/scenario4-transfer/scenario.txt"), keyphrases=False, relations=False)
# scenario4_20B = Collection()
# scenario4_20B.load(Path("./data/testing/scenario4-transfer/scenario.txt"), keyphrases=False, relations=False)

This is the line  T1	Concept 3 10;11 22;23 32	chronic lymphocytic leukemia

['T1', 'Concept 3 10;11 22;23 32', 'chronic lymphocytic leukemia']
This is the line  T2	Concept 52 64	lymphocytes

['T2', 'Concept 52 64', 'lymphocytes']
This is the line  T3	Concept 75 80;81 86;87 93	white blood cell

['T3', 'Concept 75 80;81 86;87 93', 'white blood cell']
This is the line  T4	Predicate 94 98	most

['T4', 'Predicate 94 98', 'most']
This is the line  T5	Concept 99 110	arrhythmias

['T5', 'Concept 99 110', 'arrhythmias']
This is the line  T6	Action 129 137	problems

['T6', 'Action 129 137', 'problems']
This is the line  T7	Concept 145 151	heart

['T7', 'Concept 145 151', 'heart']
This is the line  T8	Concept 153 163;164 171	electrical system

['T8', 'Concept 153 163;164 171', 'electrical system']
This is the line  T9	Concept 176 180	TEPT

['T9', 'Concept 176 180', 'TEPT']
This is the line  T10	Action 181 187	begins

['T10', 'Action 181 187', 'begins']
This is the line  T11	Concept 191 200	differ


['T992', 'Action 12157 12161', 'have']
This is the line  T993	Predicate 12174 12178	same

['T993', 'Predicate 12174 12178', 'same']
This is the line  T994	Concept 12179 12187	problems

['T994', 'Concept 12179 12187', 'problems']
This is the line  T995	Concept 12193 12200	smokers

['T995', 'Concept 12193 12200', 'smokers']
This is the line  T996	Action 12201 12206	have

['T996', 'Action 12201 12206', 'have']
This is the line  T997	Predicate 12207 12211	Some

['T997', 'Predicate 12207 12211', 'Some']
This is the line  T998	Concept 12212 12218	people

['T998', 'Concept 12212 12218', 'people']
This is the line  T999	Action 12223 12227	born

['T999', 'Action 12223 12227', 'born']
This is the line  T1000	Concept 12233 12243;12244 12249	peripheral nerve

['T1000', 'Concept 12233 12243;12244 12249', 'peripheral nerve']
This is the line  T1001	Concept 12250 12260	disorders

['T1001', 'Concept 12250 12260', 'disorders']
This is the line  T1002	Concept 12261 12271	Pseudogout

['T1002', 'Concept 

Value of i 78 [(139, 144)]
This is the sentence:  Sentence(text='These glands produce hormones that are essential for life, including sex hormones and cortisol, which helps respond to stress and has many other functions.', keyphrases=[Keyphrase(text='These', label='Reference', id=483, attr=[]), Keyphrase(text='glands', label='Concept', id=484, attr=[]), Keyphrase(text='produce', label='Action', id=485, attr=[]), Keyphrase(text='hormones', label='Concept', id=486, attr=[]), Keyphrase(text='essential', label='Concept', id=487, attr=[]), Keyphrase(text='life', label='Concept', id=488, attr=[]), Keyphrase(text='sex hormones', label='Concept', id=489, attr=[]), Keyphrase(text='cortisol', label='Concept', id=490, attr=[]), Keyphrase(text='helps', label='Action', id=491, attr=[]), Keyphrase(text='respond', label='Action', id=492, attr=[]), Keyphrase(text='stress', label='Concept', id=493, attr=[])], relations=[])
Value of i 78 [(145, 155)]
This is the sentence:  Sentence(text='These glands pr

Values:  data\training\scenario.txt target T10 T14
Values:  data\training\scenario.txt in-time T10 T12
Values:  data\training\scenario.txt subject T10 T13
Values:  data\training\scenario.txt target T13 T14
Values:  data\training\scenario.txt target T16 T17
Values:  data\training\scenario.txt subject T16 T15
Values:  data\training\scenario.txt arg T17 T18
Values:  data\training\scenario.txt target T22 T23
Values:  data\training\scenario.txt target T22 T24
Values:  data\training\scenario.txt arg T17 T21
Values:  data\training\scenario.txt arg T17 T20
Values:  data\training\scenario.txt arg T17 T19
Values:  data\training\scenario.txt arg T17 T22
Values:  data\training\scenario.txt causes T26 T25
Values:  data\training\scenario.txt in-context T29 T28
Values:  data\training\scenario.txt is-a T27 T29
Values:  data\training\scenario.txt in-context T31 T32
Values:  data\training\scenario.txt in-context T31 T33
Values:  data\training\scenario.txt target T34 T36
Values:  data\training\scenario.t

Values:  data\training\scenario.txt is-a T693 T691
Values:  data\training\scenario.txt is-a T694 T691
Values:  data\training\scenario.txt arg T697 T698
Values:  data\training\scenario.txt arg T697 T699
Values:  data\training\scenario.txt is-a T695 T696
Values:  data\training\scenario.txt part-of T695 T697
Values:  data\training\scenario.txt in-context T704 T705
Values:  data\training\scenario.txt is-a T701 T700
Values:  data\training\scenario.txt is-a T703 T700
Values:  data\training\scenario.txt is-a T705 T700
Values:  data\training\scenario.txt is-a T701 T706
Values:  data\training\scenario.txt is-a T703 T706
Values:  data\training\scenario.txt is-a T705 T706
Values:  data\training\scenario.txt is-a T701 T707
Values:  data\training\scenario.txt is-a T703 T707
Values:  data\training\scenario.txt is-a T705 T707
Values:  data\training\scenario.txt part-of T702 T703
Values:  data\training\scenario.txt in-time T708 T709
Values:  data\training\scenario.txt subject T713 T712
Values:  data\t

Value of i 15 [(54, 58)]
This is the sentence:  Sentence(text='Seizures can have many causes, including medications, high fever, head injury and certain diseases.', keyphrases=[Keyphrase(text='Seizures', label='Concept', id=107, attr=[]), Keyphrase(text='medications', label='Concept', id=108, attr=[])], relations=[])
Value of i 15 [(59, 64)]
This is the sentence:  Sentence(text='Seizures can have many causes, including medications, high fever, head injury and certain diseases.', keyphrases=[Keyphrase(text='Seizures', label='Concept', id=107, attr=[]), Keyphrase(text='medications', label='Concept', id=108, attr=[]), Keyphrase(text='high', label='Concept', id=109, attr=[])], relations=[])
Value of i 15 [(66, 70)]
This is the sentence:  Sentence(text='Seizures can have many causes, including medications, high fever, head injury and certain diseases.', keyphrases=[Keyphrase(text='Seizures', label='Concept', id=107, attr=[]), Keyphrase(text='medications', label='Concept', id=108, attr=[]), 



<scripts.utils.Collection at 0x1b314b201f0>

In [4]:
train_20 = Collection(train_20.sentences[:800])

In [32]:
sentence = train_20.sentences[7]
sentence

Sentence(text='However, if you feel itching all over your body, have hives that keep coming back, or have itching with no apparent cause, you may need medical attention.', keyphrases=[Keyphrase(text='feel', label='Action', id=39, attr=[]), Keyphrase(text='itching', label='Action', id=40, attr=[]), Keyphrase(text='all', label='Predicate', id=41, attr=[]), Keyphrase(text='body,', label='Concept', id=42, attr=[]), Keyphrase(text='hives', label='Concept', id=43, attr=[]), Keyphrase(text='keep', label='Action', id=44, attr=[]), Keyphrase(text='coming back', label='Action', id=45, attr=[]), Keyphrase(text='itching', label='Action', id=46, attr=[]), Keyphrase(text='cause', label='Concept', id=47, attr=[]), Keyphrase(text='need', label='Action', id=48, attr=[]), Keyphrase(text='medical attention.', label='Concept', id=49, attr=[])], relations=[Relation(from='itching', to='cause', label='in-context'), Relation(from='itching', to='all', label='in-place'), Relation(from='all', to='body,', label='

In [33]:
text = sentence.text

In [34]:
bert = BERTComponent('bert-large-cased')

Tokenizer:  PreTrainedTokenizer(name_or_path='bert-large-cased', vocab_size=28996, model_max_len=512, is_fast=False, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})


In [35]:
from nltk.tokenize import TreebankWordTokenizer as twt
spans = list(twt().span_tokenize(text))

In [36]:
spans

[(0, 7),
 (7, 8),
 (9, 11),
 (12, 15),
 (16, 20),
 (21, 28),
 (29, 32),
 (33, 37),
 (38, 42),
 (43, 47),
 (47, 48),
 (49, 53),
 (54, 59),
 (60, 64),
 (65, 69),
 (70, 76),
 (77, 81),
 (81, 82),
 (83, 85),
 (86, 90),
 (91, 98),
 (99, 103),
 (104, 106),
 (107, 115),
 (116, 121),
 (121, 122),
 (123, 126),
 (127, 130),
 (131, 135),
 (136, 143),
 (144, 153),
 (153, 154)]

In [37]:
sentence

Sentence(text='However, if you feel itching all over your body, have hives that keep coming back, or have itching with no apparent cause, you may need medical attention.', keyphrases=[Keyphrase(text='feel', label='Action', id=39, attr=[]), Keyphrase(text='itching', label='Action', id=40, attr=[]), Keyphrase(text='all', label='Predicate', id=41, attr=[]), Keyphrase(text='body,', label='Concept', id=42, attr=[]), Keyphrase(text='hives', label='Concept', id=43, attr=[]), Keyphrase(text='keep', label='Action', id=44, attr=[]), Keyphrase(text='coming back', label='Action', id=45, attr=[]), Keyphrase(text='itching', label='Action', id=46, attr=[]), Keyphrase(text='cause', label='Concept', id=47, attr=[]), Keyphrase(text='need', label='Action', id=48, attr=[]), Keyphrase(text='medical attention.', label='Concept', id=49, attr=[])], relations=[Relation(from='itching', to='cause', label='in-context'), Relation(from='itching', to='all', label='in-place'), Relation(from='all', to='body,', label='

In [38]:
text

'However, if you feel itching all over your body, have hives that keep coming back, or have itching with no apparent cause, you may need medical attention.'

In [39]:
text = sentence.text
spans = list(twt().span_tokenize(text))

In [40]:
text_word_embeddings, text_embedding = bert.get_bert_embeddings(text, spans)

In [41]:
text_word_embeddings

[tensor([-0.4310, -1.1438,  0.0899,  ...,  1.2625, -0.7967,  0.0751]),
 tensor([-0.0457, -0.3175,  0.0430,  ...,  1.0440, -0.7202,  0.5596]),
 tensor([ 0.1354,  0.9362, -0.2143,  ...,  0.5116, -0.9073, -0.3039]),
 tensor([ 0.4726, -0.2852,  0.0030,  ...,  0.4294, -0.8079,  0.2888]),
 tensor([-0.5834,  0.2132,  0.2505,  ..., -0.0095,  0.5432, -0.2030]),
 tensor([ 0.3509,  0.1296, -0.2321,  ...,  0.5885,  0.0217, -0.0660]),
 tensor([-0.0645,  0.3081, -0.1070,  ...,  0.0178, -0.4771, -0.2134]),
 tensor([ 0.1650,  0.2231, -0.2524,  ...,  0.3522, -0.2927, -0.5516]),
 tensor([ 1.0647,  0.4675, -0.0213,  ...,  0.3438, -0.3550,  0.1908]),
 tensor([ 0.6960,  0.5852, -0.2437,  ..., -0.2017,  0.2033, -0.2405]),
 tensor([-0.5446, -1.0509, -0.3094,  ...,  0.4499,  0.8804,  0.1090]),
 tensor([-0.0116,  0.2647,  0.3383,  ...,  0.9281,  0.5913, -0.2400]),
 tensor([ 0.3599, -0.2769,  0.3066,  ...,  0.3681,  0.2989, -0.1741]),
 tensor([ 0.4502,  0.0050,  0.0980,  ...,  1.1149, -0.2044, -0.3441]),
 tenso

In [55]:
test_embedding_prev = text_word_embeddings[5]

In [56]:
test_embedding_prev

tensor([ 0.3509,  0.1296, -0.2321,  ...,  0.5885,  0.0217, -0.0660])

In [87]:
text2 = 'However, if you feel itching all over your body, have hives that keep coming back, or have itching with no apparent cause, you may need medical attention.'
spans2 = list(twt().span_tokenize(text2))
text_word_embeddings2, text_embedding2 = bert.get_bert_embeddings(text2, spans2)
test_embedding_changed = text_word_embeddings2[5]

In [88]:
from torch import nn
criterion = nn.L1Loss()
loss = criterion(test_embedding_prev, test_embedding_changed)
print(loss)

tensor(0.0290)


In [89]:
from sklearn.metrics.pairwise import cosine_similarity
score = cosine_similarity([test_embedding_prev.numpy()], [test_embedding_changed.numpy()])[0][0]
print(score)

0.9984857


In [42]:
sentence

Sentence(text='However, if you feel itching all over your body, have hives that keep coming back, or have itching with no apparent cause, you may need medical attention.', keyphrases=[Keyphrase(text='feel', label='Action', id=39, attr=[]), Keyphrase(text='itching', label='Action', id=40, attr=[]), Keyphrase(text='all', label='Predicate', id=41, attr=[]), Keyphrase(text='body,', label='Concept', id=42, attr=[]), Keyphrase(text='hives', label='Concept', id=43, attr=[]), Keyphrase(text='keep', label='Action', id=44, attr=[]), Keyphrase(text='coming back', label='Action', id=45, attr=[]), Keyphrase(text='itching', label='Action', id=46, attr=[]), Keyphrase(text='cause', label='Concept', id=47, attr=[]), Keyphrase(text='need', label='Action', id=48, attr=[]), Keyphrase(text='medical attention.', label='Concept', id=49, attr=[])], relations=[Relation(from='itching', to='cause', label='in-context'), Relation(from='itching', to='all', label='in-place'), Relation(from='all', to='body,', label='

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

sentences = train_20.sentences[:800]

dicOriginalSim = {}
dicResSim = {}
dicNotEntSim = {}

for sentence in sentences:
    text = sentence.text
    spans = list(twt().span_tokenize())
    keyphrases = sentence.keyphrases
    for source in keyphrases:
        for target in keyphrases:
            if source.text == target.text:
                continue
            
            text_word_embeddings, text_embedding = bert.get_bert_embeddings(text, spans)
            #Map Entity to embedding:
            
            sourceEmbedding = None
            targetEmbedding = None
            for i in range(len(spans)):
                span = spans[i]
                word = text[span[0]:span[1]]
                if word == source.text:
                    sourceEmbedding = text_word_embeddings[i]
                elif word == target.text:
                    targetEmbedding = text_word_embeddings[i]
            
            score = cosine_similarity([sourceEmbedding.numpy()], [targetEmbedding.numpy()])[0][0]
            
            dicOriginalSim[(source.id, target.id)] = score
            dicOriginalSim[(target.id, source.id)] = score
            
            #Remove specifically the word entity that we are now: (Consider doing it in both directions and stay with the biggest)
            
            
            
            
            

In [94]:
sentence.keyphrases

[Keyphrase(text='feel', label='Action', id=39, attr=[]),
 Keyphrase(text='itching', label='Action', id=40, attr=[]),
 Keyphrase(text='all', label='Predicate', id=41, attr=[]),
 Keyphrase(text='body,', label='Concept', id=42, attr=[]),
 Keyphrase(text='hives', label='Concept', id=43, attr=[]),
 Keyphrase(text='keep', label='Action', id=44, attr=[]),
 Keyphrase(text='coming back', label='Action', id=45, attr=[]),
 Keyphrase(text='itching', label='Action', id=46, attr=[]),
 Keyphrase(text='cause', label='Concept', id=47, attr=[]),
 Keyphrase(text='need', label='Action', id=48, attr=[]),
 Keyphrase(text='medical attention.', label='Concept', id=49, attr=[])]