In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [8]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No entities found')

In [4]:
doc = nlp(u"Hi how are you?")

In [5]:
show_ents(doc)

No entities found


In [6]:
doc2 = nlp(u"May I go to Washington, DC next May to see the Washington Monument ?")

In [9]:
show_ents(doc2)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [11]:
doc3 = nlp(u"Can I please have 500. dollars of Microsoft Stocks")

In [12]:
show_ents(doc3)

500. - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [13]:
doc4 = nlp(u"Tesla to build a U.K. factory for $6 million")

In [14]:
show_ents(doc4)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [15]:
from spacy.tokens import Span

In [20]:
ORG = doc.vocab.strings[u"ORG"]

In [21]:
ORG

381

In [22]:
new_ent = Span(doc4,0,1,label=ORG)

In [25]:
doc4.ents = list(doc4.ents) + [new_ent]

In [26]:
show_ents(doc4)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


___
## Adding Named Entities to All Matching Spans
What if we want to tag *all* occurrences of "Tesla"? In this section we show how to use the PhraseMatcher to identify a series of spans in the Doc:

In [69]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
         u"This new vacuum-cleaner is the best in show.")

In [60]:
show_ents(doc)

No entities found


In [61]:
from spacy.matcher import PhraseMatcher

In [62]:
matcher = PhraseMatcher(nlp.vocab)

In [63]:
phrase_list = ["vacuum cleaner", "vacuum-cleaner"]

In [64]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [65]:
phrase_patterns

[vacuum cleaner, vacuum-cleaner]

In [66]:
matcher.add('newproduct', None, *phrase_patterns)

In [70]:
found_matches = matcher(doc)

In [71]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [72]:
from spacy.tokens import Span

In [73]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [74]:
new_ents = [Span(doc,match[1], match[2], label=PROD) for match in found_matches]

In [78]:
doc.ents = list(doc.ents) + new_ents

In [79]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [80]:
doc2 = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")

In [83]:
len([ent for ent in doc2.ents if ent.label_ =="MONEY"])

2