In [3]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [8]:
def show_ents(doc):
    if doc.ents:   # if it actually have any entity
        for ent in doc.ents:
            print(ent.text + ' - ' + ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
    else:
        print('No entity found')

In [5]:
doc = nlp(u'Hi, how are you?')
show_ents(doc)

No entity found


In [9]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [10]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stock?")
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [11]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [13]:
from spacy.tokens import Span
ORG = doc.vocab.strings[u"ORG"]
ORG

381

In [15]:
new_entity = Span(doc, 0, 1, label=ORG)
doc.ents = list(doc.ents) + [new_entity]
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


# Part Two

In [33]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show."
         )
show_ents(doc)

No entity found


In [34]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [35]:
matcher.add('newproduct', None, *phrase_patterns)

In [36]:
found_matches = matcher(doc)

In [37]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [38]:
from spacy.tokens import Span

In [39]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [40]:
found_matches 

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [41]:
new_entities = [Span(doc, match[1], match[2], label = PROD) for match in found_matches]
doc.ents = list(doc.ents) + new_entities

In [42]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [47]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[29.95, 10 dollars]

# Visualizing Named Entity Recognition

In [48]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.")
from spacy import displacy
displacy.render(doc, style = "ent", jupyter = True)

In [49]:
doc = nlp(
    u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million."
    u"By contrast, Sony only sold 8 thousand Walkman music players."
)

for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True)

In [50]:
options = {'ents':['PRODUCT']}
displacy.render(doc, style='ent', jupyter=True, options=options)

In [51]:
colors = {'ORG': 'pink'}
options = {'colors': colors}
displacy.render(doc, style='ent', jupyter=True, options=options)

In [54]:
color = {'ORG':'linear-gradient(90deg, #aa9cfc, #fc9ce7)'}
options = {'colors': color}
displacy.render(doc, style='ent', jupyter=True, options=options)