### Named Entity Recognition

In [1]:
import spacy
nlp = spacy.load('en_core_web_lg')

#### 1. Defining Entity Function

In [9]:
doc = nlp('Google is the child company of Alphabet. It is a US based organization.')
def show_entities(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent,'|',ent.label_,'|',spacy.explain(ent.label_))
    else:
        print("No entities found")
show_entities(doc)

Google | ORG | Companies, agencies, institutions, etc.
Alphabet | ORG | Companies, agencies, institutions, etc.
US | GPE | Countries, cities, states


In [11]:
show_entities(nlp('I am feeling dizzy today.'))

today | DATE | Absolute or relative dates or periods


In [12]:
show_entities(nlp('I am feeling dizzy.'))

No entities found


#### 2. Adding new Entity - One at a time

In [35]:
from spacy.tokens import Span as sp
doc = nlp('Gin is one of the biggest giant of shoes')
new_entity = sp(doc,0,1,label = doc.vocab.strings['ORG'])
doc.ents = list(doc.ents) + [new_entity]
show_entities(doc)

Gin | ORG | Companies, agencies, institutions, etc.


#### 3. Adding new Entity - Multiple at a time

In [46]:
from spacy.matcher import PhraseMatcher
doc = nlp('Eating candy and chocolate are both injurious to health.')
m = PhraseMatcher(nlp.vocab)
phrase = ['candy','chocolate']
pattern = [nlp(text) for text in phrase]

m.add('Sweet',None,*pattern)
show_entities(doc)

No entities found


In [52]:
from spacy.tokens import Span as sp
sweet = doc.vocab.strings['Sweet']
found = m(doc)

new_ents = [sp(doc,match[1], match[2],label = 'Sweet') for match in found]

doc.ents = list(doc.ents) + new_ents
show_entities(doc)

candy | Sweet | None
chocolate | Sweet | None




#### 4. Finding the Specific Tag words

In [63]:
doc = nlp('Google, Apple and Alphabet,china, 10 thousand')
show_entities(doc)

Google | ORG | Companies, agencies, institutions, etc.
Apple | ORG | Companies, agencies, institutions, etc.
Alphabet | ORG | Companies, agencies, institutions, etc.
china | GPE | Countries, cities, states
10 thousand | CARDINAL | Numerals that do not fall under another type


In [64]:
[ent for ent in doc.ents if ent.label_ == 'ORG']

[Google, Apple, Alphabet]

In [65]:
[ent for ent in doc.ents if ent.label_ == 'CARDINAL']

[10 thousand]