# 1) Extract Data from Named Entity Recognition

In [1]:
from flair.data import Sentence
from flair.models import SequenceTagger

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load tagger
tagger = SequenceTagger.load("flair/ner-english-ontonotes")

# make example sentence
sentence = Sentence("Sanskar had trouble with poop, while Kshitij had trouble with allergies.")

# predict NER tags
tagger.predict(sentence)

2023-03-12 11:34:24,711 SequenceTagger predicts: Dictionary with 76 tags: <unk>, O, B-CARDINAL, E-CARDINAL, S-PERSON, S-CARDINAL, S-PRODUCT, B-PRODUCT, I-PRODUCT, E-PRODUCT, B-WORK_OF_ART, I-WORK_OF_ART, E-WORK_OF_ART, B-PERSON, E-PERSON, S-GPE, B-DATE, I-DATE, E-DATE, S-ORDINAL, S-LANGUAGE, I-PERSON, S-EVENT, S-DATE, B-QUANTITY, E-QUANTITY, S-TIME, B-TIME, I-TIME, E-TIME, B-GPE, E-GPE, S-ORG, I-GPE, S-NORP, B-FAC, I-FAC, E-FAC, B-NORP, E-NORP, S-PERCENT, B-ORG, E-ORG, B-LANGUAGE, E-LANGUAGE, I-CARDINAL, I-ORG, S-WORK_OF_ART, I-QUANTITY, B-MONEY


In [3]:
sentence.get_spans('ner')

[Span[0:1]: "Sanskar" → PERSON (0.9969),
 Span[7:8]: "Kshitij" → PERSON (0.9998)]

In [4]:
type(sentence.get_spans('ner'))

list

In [8]:
sentence.get_labels()

['Span[0:1]: "Sanskar"'/'PERSON' (0.9969),
 'Span[7:8]: "Kshitij"'/'PERSON' (0.9998)]

In [27]:
sentence.get_labels()[0].shortstring

'"Sanskar"/PERSON'

In [28]:
sentence.get_labels()[0].shortstring.split('/')

['"Sanskar"', 'PERSON']

In [30]:
sentence.get_labels()[0].shortstring.split('/')[0][1:-1]

'Sanskar'

In [31]:
entities = {}
for label in sentence.get_labels():
    value = label.shortstring.split('/')[0][1:-1]
    key = label.shortstring.split('/')[1]
    try:
        entities[key].append(value)
    except:
        entities[key] = [value]

entities

{'PERSON': ['Sanskar', 'Kshitij']}

# 2) Extract verbs from sentence

In [35]:
import spacy

nlp = spacy.load("en_core_web_lg")
doc = nlp("Man walks into a bar. He ate food.")  # Your text here

words = []
for token in doc:
    if token.pos_ == "VERB":
        start = token.idx  # Start position of token
        end = token.idx + len(token)  # End position = start + len(token)
        words.append((token.text, start, end, token.pos_))


In [36]:
words

[('walks', 4, 9, 'VERB'), ('ate', 25, 28, 'VERB')]

In [37]:
words[0][0]

'walks'

# 3) Check if 2 words are synonyms

In [41]:
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\harsh\AppData\Roaming\nltk_data...


In [42]:
def check_for_synonyms(word, word_list):
    for synset in wordnet.synsets(word):
        for lemma in synset.lemma_names():
            if lemma in word_list and lemma != word:
                return True
    return False

In [46]:
get_word_synonyms_from_sent('happy', ['bad', 'glad'])

True