In [0]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [0]:
def show_ents(doc):
  if doc.ents:
    for ent in doc.ents:
      print(ent.text + ' - ' + ent.label_ + ' -' + str(spacy.explain(ent.label_)))

  else:
    print('No entities found')

In [16]:
doc = nlp(u"Hi how are you?")
show_ents(doc)

No entities found


In [17]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")
show_ents(doc)

Washington - GPE -Countries, cities, states
next May - DATE -Absolute or relative dates or periods
the Washington Monument - ORG -Companies, agencies, institutions, etc.


In [18]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stock?")
show_ents(doc)

500 dollars - MONEY -Monetary values, including unit
Microsoft - ORG -Companies, agencies, institutions, etc.


In [19]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")
show_ents(doc)

U.K. - GPE -Countries, cities, states
$6 million - MONEY -Monetary values, including unit


In [0]:
from spacy.tokens import Span

In [0]:
ORG = doc.vocab.strings[u"ORG"]

In [22]:
ORG

383

In [0]:
new_ent = Span(doc, 0, 1, label=ORG)

In [0]:
doc.ents = list(doc.ents) + [new_ent]

In [25]:
show_ents(doc)

Tesla - ORG -Companies, agencies, institutions, etc.
U.K. - GPE -Countries, cities, states
$6 million - MONEY -Monetary values, including unit


In [0]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show.")

In [27]:
show_ents(doc)

No entities found


In [0]:
from spacy.matcher import PhraseMatcher

In [0]:
matcher = PhraseMatcher(nlp.vocab)

In [0]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]
matcher.add('newproduct', None, *phrase_patterns)

In [31]:
found_matches = matcher(doc)
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [0]:
PROD = doc.vocab.strings[u"PRODUCT"]
new_ents = [Span(doc, match[1], match[2], label=PROD) for match in found_matches]

In [0]:
doc.ents = list(doc.ents) + new_ents 

In [36]:
show_ents(doc)

vacuum cleaner - PRODUCT -Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT -Objects, vehicles, foods, etc. (not services)


In [0]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it id marked down by 10 dollars.")

In [40]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[29.95, 10 dollars]