In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [4]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' + ent.label_ + ' - ' + spacy.explain(ent.label_))
    else:
        print('No entities found')

In [5]:
doc = nlp(u'Hi, how are you?')

In [6]:
show_ents(doc)

No entities found


In [7]:
doc = nlp(u'May I go to Washington, DC next May to see Washington monument ')

In [8]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
Washington - GPE - Countries, cities, states


In [9]:
doc = nlp(u'Can I please have 500 dollars from Microsoft stock')

In [10]:
show_ents(doc)


500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [11]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')

In [12]:
show_ents(doc)


U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [13]:
from spacy.tokens import Span

In [14]:
ORG = doc.vocab.strings[u"ORG"]

In [15]:
ORG

381

In [16]:
new_ent = Span(doc,0,1,label=ORG)

In [17]:
doc.ents = list(doc.ents) + [new_ent]

In [18]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [27]:
doc = nlp(u'Our company plans to introduce a new vacuum cleaner. '
          u'If successful, the vacuum cleaner will be our first product.')

show_ents(doc)

first - ORDINAL - "first", "second", etc.


In [35]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [36]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [37]:
# Apply the patterns to our matcher object:
matcher.add('newproduct', None, *phrase_patterns)

# Apply the matcher to our Doc object:
matches = matcher(doc)

# See what matches occur:
matches

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 16)]

In [38]:
from spacy.tokens import Span

In [39]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [40]:
matches

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 16)]

In [42]:
new_ents = [Span(doc,match[1],match[2], label=PROD) for match in matches]

In [43]:
doc.ents = list(doc.ents) + new_ents

In [44]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
first - ORDINAL - "first", "second", etc.


In [46]:
doc = nlp(u"Originally I paid $29.85 for this toy car, but now it is marked down by $10")

In [48]:
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

2