In [1]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [2]:
# Function to show Entity

def show_ents(doc):
    if doc.ents:    # If there is Entity in the Doc
        for ent in doc.ents:
            print(ent.text + ' - '+ ent.label_ + ' - '+ str(spacy.explain(ent.label_)))
    else :
        print('No Entities Found :( ')

In [3]:
doc = nlp(u'Hi how are you?')

show_ents(doc)

No Entities Found :( 


In [4]:
doc = nlp(u'May i go to Washington DC, next May to see the Washington Monument?')

show_ents(doc)

Washington DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


### Adding an Entity to Spacy Library

In [5]:
# Here Spacy does not recognise 'Tesla' as a Company
doc = nlp(u"Tesla to build a U.K. factory for $6 million")

show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [6]:
from spacy.tokens import Span

# Get the Organisation Entity Label 
ORG = doc.vocab.strings[u"ORG"]

# 381 is the Hash Value of ORG
ORG

381

In [7]:
# Create a Span for the new Entity
new_ent = Span(doc,0,1,label=ORG)       # Span(document, start, end, label for that entity)

doc.ents = list(doc.ents) + [new_ent]

show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


### Part 2

### Tagging several Entity to Spacy Library

In [10]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show")
#print(doc.text)

# Here Spacy has not recognise vacuum cleaner & vacuum-cleaner
show_ents(doc)

No Entities Found :( 


In [11]:
from spacy.matcher import PhraseMatcher

matcher = PhraseMatcher(nlp.vocab)

In [12]:
# Searching for the Text
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']

phrase_pattern = [nlp(text) for text in phrase_list]

matcher.add('newproduct', None, *phrase_pattern)

In [13]:
found_matches = matcher(doc)

found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [14]:
# Assigning the Text into a Tag
from spacy.tokens import Span

PROD = doc.vocab.strings[u"PRODUCT"]

#span(doc,match[1],match[2],label=PROD)
new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches]

In [15]:
# Just adding the empty doc.ents (from show_ents(doc)) with the new Assign tag one 
doc.ents = list(doc.ents) + new_ents

In [16]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


### Count How many Named Entity are Mentioned

In [19]:
doc = nlp(u"Originally I paid $29.95 for this car toy in London, but now it is marked down by 10 dollars")

In [20]:
# All Entities
[ent for ent in doc.ents]

[29.95, London, 10 dollars]

In [21]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[29.95, 10 dollars]

In [22]:
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

2