In [1]:
import spacy
nlp = spacy.load(r'en_core_web_sm')

In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' +str(spacy.explain(ent.label_)))
    else:
        print("No Entity Found")

In [3]:
doc = nlp(u"My brother Rahil was 6 when orange-biscuits cost $5 for packet. It was year 2007, I remember playing at 3:00PM in the summer. Tesla is an driverless car")

In [4]:
show_ents(doc)

Rahil - People, including fictional
6 - Absolute or relative dates or periods
5 - Monetary values, including unit
year 2007 - Absolute or relative dates or periods
3:00PM - Absolute or relative dates or periods
the summer - Absolute or relative dates or periods


In [5]:
from spacy.tokens import Span
ORG = doc.vocab.strings[u"ORG"]
ORG

383

In [6]:
new_ent = Span(doc,29,30,label=ORG)
doc.ents = list(doc.ents) + [new_ent]
show_ents(doc)

Rahil - People, including fictional
6 - Absolute or relative dates or periods
5 - Monetary values, including unit
year 2007 - Absolute or relative dates or periods
3:00PM - Absolute or relative dates or periods
the summer - Absolute or relative dates or periods
Tesla - Companies, agencies, institutions, etc.


In [7]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show.")

In [8]:
show_ents(doc)

No Entity Found


In [9]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)
pattern = ['vacuum-cleaner','vacuum cleaner']
phrase_patterns = [nlp(text) for text in pattern]
matcher.add('newprod',phrase_patterns)

In [10]:
found =matcher(doc)

In [11]:
found

[(7467514733791696242, 6, 8), (7467514733791696242, 11, 14)]

In [12]:
from spacy.tokens import Span
PRODUCT = doc.vocab.strings[u"PRODUCT"]
found

[(7467514733791696242, 6, 8), (7467514733791696242, 11, 14)]

In [13]:
new_ents = [Span(doc,match[1],match[2],label = PRODUCT) for match in found]

In [14]:
doc.ents = list(doc.ents) + new_ents

In [15]:
doc.ents

(vacuum cleaner, vacuum-cleaner)

In [16]:
doc = nlp(u"Originally I paid $12.97 for this car toy, but now it is marked $10")

In [17]:
ent = [ent for ent in doc.ents if ent.label_ == 'MONEY']
ent

[12.97, 10]

In [18]:
len(ent)

2

In [19]:
from spacy import displacy
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $3 million."u"By contrast Sony only sold 8 thousand Walkman music player.")
displacy.render(doc,style='ent',jupyter=True)

In [20]:
for sent in doc.sents:
    displacy.render(nlp(sent.text),style='ent',jupyter=True)