In [4]:
import spacy

In [5]:
nlp = spacy.load('en_core_web_sm')

In [3]:
#making a fucntion to show basic entity information

In [7]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - '+ ent.label_ + ' - '+str(spacy.explain(ent.label_)))
    else:
        print("No entities found! hahaha nothing here is famous buddy.")

In [8]:
doc1 = nlp(u"I am a real authentic explorer. Listen, I may not have all the riches in the world but what I do have is a massive heart.")

In [9]:
show_ents(doc1)

No entities found! hahaha nothing here is famous buddy.


In [10]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

In [11]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [12]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stocks?")

In [13]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [14]:
doc = nlp(u"I am really hungry. Today I might order Chipotle and get some Burgers. Although I ate pizza yesterday, I am willing to consume more fast-food this whole month.")
show_ents(doc)

Chipotle - ORG - Companies, agencies, institutions, etc.
Burgers - ORG - Companies, agencies, institutions, etc.
this whole month - DATE - Absolute or relative dates or periods


In [15]:
#add own named entity as a span

In [16]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million.")

In [17]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [18]:
#here it does not recognize Tesla as an ORG. So we will add it

In [19]:
from spacy.tokens import Span

In [20]:
ORG = doc.vocab.strings[u"ORG"]

In [21]:
ORG

381

In [22]:
new_ent = Span(doc, 0, 1, label=ORG)

In [23]:
doc.ents = list(doc.ents) + [new_ent]

In [24]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [25]:
#now it recognizes Tesla as an ORG which we added

In [26]:
docx = nlp(u"I bought a Tesla model 3.")

In [27]:
show_ents(docx)

Tesla - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [28]:
docx = nlp(u"Me and my friends went on a ride. It was wonderful enjoyable ride. We had a Tesla. Btw, Tesla is investing money into apple. Did you hear that?")
show_ents(docx)

Tesla - PRODUCT - Objects, vehicles, foods, etc. (not services)
Tesla - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [2]:
#add multiple named entity. For example we want to add vaccume cleaner and vaccume-cleaner

In [43]:
doc = nlp(u"Our company created a brand new vaccume cleaner."
         u"This new vaccume-cleaner is the best in show.")

In [44]:
show_ents(doc)

No entities found! hahaha nothing here is famous buddy.


In [45]:
from spacy.matcher import PhraseMatcher

In [46]:
matcher = PhraseMatcher(nlp.vocab)

In [47]:
phrase_list = ['vaccume cleaner','vaccume-cleaner']

In [48]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [49]:
#checking for clarity
print(phrase_patterns)
phrase_patterns[1]

[vaccume cleaner, vaccume-cleaner]


vaccume-cleaner

In [50]:
matcher.add('newproduct', None, *phrase_patterns)

In [51]:
found_matches = matcher(doc)

In [52]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [53]:
from spacy.tokens import Span

In [54]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [55]:
new_ents = [Span(doc, match[1],match[2], label=PROD) for match in found_matches]

In [56]:
doc.ents = list(doc.ents) + new_ents

In [57]:
show_ents(doc)

vaccume cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vaccume-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [60]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars. I bought it from Panama")

In [63]:
#checking how many of one entity was there in a doc
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

2