In [32]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [33]:
# Write a function to display basic entity info:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No named entities found.')

In [34]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')

show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [35]:
doc2 = nlp(u'Can I please have 500 dollars of Microsoft stock?')

In [36]:
show_ents(doc2)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [37]:
doc3 = nlp(u'Tesla to build a UK factory for $6 million')

In [38]:
show_ents(doc3)

UK - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [39]:
from spacy.tokens import Span

In [40]:
ORG = doc3.vocab.strings[u'ORG']

In [41]:
ORG

383

In [42]:
new_ent = Span(doc3, 0, 1, label=ORG)

In [43]:
doc3.ents = list(doc3.ents) + [new_ent]

In [44]:
show_ents(doc3)

Tesla - ORG - Companies, agencies, institutions, etc.
UK - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [48]:
doc = nlp(u'Our company created a brand new vacuum cleaner'
          u'This new vacuum-cleaner is the best in show.')

In [49]:
show_ents(doc)

No named entities found.


In [50]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [51]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']

In [52]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [53]:
matcher.add('newproduct', None, *phrase_patterns)

In [54]:
found_matches = matcher(doc)

In [55]:
found_matches

[(2689272359382549672, 9, 12)]

In [56]:
from spacy.tokens import Span

In [57]:
PROD = doc.vocab.strings[u'PRODUCT']

In [58]:
found_matches

[(2689272359382549672, 9, 12)]

In [61]:
new_ents = [Span(doc, match[1], match[2], label=PROD) for match in found_matches]

In [62]:
doc.ents = list(doc.ents) + new_ents

In [63]:
show_ents(doc)

vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [64]:
doc = nlp(u'Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars')

In [66]:
[ent for ent in doc.ents if ent.label_ == "MONEY"].__len__()

2