In [6]:
import spacy 

In [7]:
nlp = spacy.load("en_core_web_sm")

In [58]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + " - "+ent.label_+ " - "+str(spacy.explain(ent.label_)))
        else:
            print("No entities found")

In [73]:
doc = nlp(u"I have to spend at least 500 dollars for my SabaCar in kyiv. Next May I will go to Washington DC to see Kotsur Corporation")

In [74]:
show_ents(doc)

at least 500 dollars - MONEY - Monetary values, including unit
SabaCar - PERSON - People, including fictional
Washington DC - GPE - Countries, cities, states
Kotsur Corporation - ORG - Companies, agencies, institutions, etc.
No entities found


In [75]:
from spacy.tokens import Span

In [76]:
ORG = doc.vocab.strings[u"ORG"]

In [77]:
ORG

383

In [85]:
# adding a new entity

new_ent=Span(doc,12,13, label=ORG)

In [87]:
show_ents(doc)

at least 500 dollars - MONEY - Monetary values, including unit
SabaCar - PERSON - People, including fictional
kyiv - ORG - Companies, agencies, institutions, etc.
Washington DC - GPE - Countries, cities, states
Kotsur Corporation - ORG - Companies, agencies, institutions, etc.
No entities found


In [88]:
doc1 = nlp(u"Our company created a brand new vacuum cleaner."
          u"This vacuum-cleaner is the best one in the show.")

In [89]:
show_ents(doc1)

In [90]:
from spacy.matcher import PhraseMatcher

In [91]:
matcher = PhraseMatcher(nlp.vocab)

In [92]:
phrase_list = ["vacuum cleaner", "vacuum-cleaner"]

In [94]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [95]:
matcher.add("newproduct", None, *phrase_patterns)

In [96]:
found_matches = matcher(doc)

In [97]:
found_matches

[]

In [98]:
from spacy.tokens import Span

In [99]:
PROD = doc1.vocab.strings[u"PRODUCT"]

In [100]:
found_matches

[]

In [102]:
new_ents = [Span(doc1, match[1], match[2], label=PROD) for match in found_matches]

In [103]:
doc.ents = list(doc.ents) + new_ents

In [105]:
print(show_ents(doc1))

None


In [106]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[at least 500 dollars]

In [107]:
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

1

In [108]:
# visualization

from spacy import displacy

In [110]:
displacy.render(doc,style="ent", jupyter=True)

In [114]:
options = {"ents":["PERSON", "ORG"]}

In [115]:
displacy.render(doc,style="ent", jupyter=True, options=options)

In [120]:
colors = {"ORG":"linear-gradient(45deg, yellow, blue)"}
options = {"ents":["PERSON", "ORG"], "colors":colors}

In [121]:
displacy.render(doc,style="ent", jupyter=True, options=options)