In [1]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text +' '+ ent.label_ +' '+ str(spacy.explain(ent.label_)))
    else:
        print('No entities found')

In [9]:
doc = nlp('I want to see the Washington monument, I might visit next month')

In [10]:
show_ents(doc)

Washington GPE Countries, cities, states
next month DATE Absolute or relative dates or periods


## Adding named_entities

In [25]:
doc = nlp("Tesla is building a U.K. based factory for $6 million")

In [26]:
from spacy.tokens import Span

In [27]:
ORG = doc.vocab.strings[u"ORG"]

In [28]:
ORG

383

In [31]:
new_ent = Span(doc,0,1,label=ORG)
new_ent

Tesla

In [35]:
show_ents(doc)

Tesla ORG Companies, agencies, institutions, etc.
U.K. GPE Countries, cities, states
$6 million MONEY Monetary values, including unit


In [46]:
## Adding 2 or more entities
doc1 = nlp("Our company manufactures vaccum cleaner."
        "This new vaccum-cleaner is the best")

In [47]:
show_ents(doc1)

No entities found


In [39]:
from spacy.matcher import PhraseMatcher

In [40]:
matcher = PhraseMatcher(nlp.vocab)

In [41]:
phrase_list = ['vaccum cleaner','vaccum-cleaner']

In [48]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [49]:
matcher.add('newproduct', None, *phrase_patterns)

In [50]:
found_matches = matcher(doc1)

In [51]:
found_matches

[(2689272359382549672, 3, 5), (2689272359382549672, 8, 11)]

In [52]:
from spacy.tokens import Span

In [55]:
PROD = doc1.vocab.strings[u"PRODUCT"]

In [56]:
found_matches

[(2689272359382549672, 3, 5), (2689272359382549672, 8, 11)]

In [57]:
new_ents  = [Span(doc1,match[1],match[2],label=PROD) for match in found_matches]

In [59]:
doc1.ents = list(doc1.ents)+new_ents

In [60]:
show_ents(doc1)

vaccum cleaner PRODUCT Objects, vehicles, foods, etc. (not services)
vaccum-cleaner PRODUCT Objects, vehicles, foods, etc. (not services)


## Visualize NER

In [61]:
from spacy import displacy

In [62]:
sam_doc = nlp("Tesla is building a U.K. based factory for $6 million")

In [63]:
displacy.render(doc,style='ent',jupyter=True)

In [70]:
#Choose Colors
colors = {'ORG':'#aa9cfc'}
#Select entities to visualize
options = {'ents':['MONEY','ORG'],'colors':colors}

In [71]:
displacy.render(doc,style='ent',jupyter=True,options=options)