In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [18]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' ---> '+ent.label_+' --> '+str(spacy.explain(ent.label_)))
    else:
        print('No entities found.')

In [11]:
doc = nlp('Hi, How are you')

In [12]:
show_ents(doc)

No entities found.


In [19]:
doc2 = nlp('I May go to Wahington, DC next may to see the Washington Monument!')

In [20]:
show_ents(doc2)

Wahington ---> GPE --> Countries, cities, states
DC ---> GPE --> Countries, cities, states
Washington ---> GPE --> Countries, cities, states


In [21]:
doc3 = nlp('Can I please have 500 dollars of Microsoft stock?')

In [22]:
show_ents(doc3)

500 dollars ---> MONEY --> Monetary values, including unit
Microsoft ---> ORG --> Companies, agencies, institutions, etc.


In [23]:
doc4 = nlp('Tesla to build a U.K. factory for $6 million')

In [24]:
show_ents(doc4)

U.K. ---> GPE --> Countries, cities, states
$6 million ---> MONEY --> Monetary values, including unit


In [25]:
# adding new entity 
from spacy.tokens import Span
ORG = doc.vocab.strings['ORG']
new_ent = Span(doc4,0,1,label = ORG)
doc4.ents = list(doc4.ents)+[new_ent]

In [26]:
show_ents

<function __main__.show_ents(doc)>

In [27]:
show_ents(doc4)

Tesla ---> ORG --> Companies, agencies, institutions, etc.
U.K. ---> GPE --> Countries, cities, states
$6 million ---> MONEY --> Monetary values, including unit


In [28]:
doc5 = nlp("Our comapny created a brand new vaccum cleaner."'This new vaccum-cleaner is the best in show.')

In [29]:
show_ents(doc5)

No entities found.


In [30]:
# we use pharse_matcher for repeated entities. Find the below example for repeated_Named_Entities
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(doc.vocab)
phrase_list = ['vaccum cleaner', 'vaccum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [31]:
matcher.add('newproduct',None, *phrase_patterns)

In [32]:
found_matches = matcher(doc5)

In [33]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [34]:
from spacy.tokens import Span
PROD = doc.vocab.strings['PRODUCT']
new_ents = [Span(doc5,match[1],match[2], label = PROD) for match in found_matches]
doc5.ents = list(doc5.ents)+new_ents

In [35]:
show_ents(doc5)

vaccum cleaner ---> PRODUCT --> Objects, vehicles, foods, etc. (not services)
vaccum-cleaner ---> PRODUCT --> Objects, vehicles, foods, etc. (not services)


In [36]:
doc6 = nlp('Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars')

In [39]:
[ent for ent in doc6.ents if ent.label_ == 'MONEY']

[29.95, 10 dollars]

In [40]:
len([ent for ent in doc6.ents if ent.label_ == 'MONEY'])

2

# Named Entities Recongnition and Dispaly

In [41]:
doc7 = nlp('Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.''By contrast Sony only sold 8 thousand music Walkman players')

In [42]:
from spacy import displacy

In [43]:
displacy.render(doc7, style = 'ent', jupyter = True)

In [46]:
# displaying the entities separely for each and every sentence.
for sent in doc7.sents:
    displacy.render(nlp(sent.text),style = 'ent')

In [54]:
# displaying only product entites from the doc
options = {'ents':['PRODUCT']}
displacy.render(doc7,style = 'ent',options = options)

In [52]:
# displaying only product entites from the doc
options = {'ents':['PRODUCT']}
for sent in doc7.sents:
    displacy.render(nlp(sent.text),style = 'ent',options = options)

In [55]:
# Displaying only "ORG" entites
options = {'ents':['ORG']}
displacy.render(doc7,style = 'ent', options  = options)

In [59]:
for sent in doc7.sents:
    displacy.render(nlp(sent.text), style = 'ent', options = options)

In [60]:
# Displaying ORG and PRODUCT entities
options = {'ents':['PRODUCT','ORG']}
for sent in doc7.sents:
    displacy.render(nlp(sent.text), style = 'ent', jupyter = True, options = options)

In [63]:
# Changing the colors of the entities.

colors = {'ORG':'RED', 'PRODUCT':'YELLOW'}
options = {'ents':['ORG','PRODUCT'],'colors':colors}
displacy.render(doc7, style = 'ent', jupyter = True, options = options)

In [64]:
for sent in doc7.sents:
    displacy.serve(nlp(sent.text), style = 'ent', options = options )

  "__main__", mod_spec)



Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [22/Sep/2020 16:56:50] "GET / HTTP/1.1" 200 1166
127.0.0.1 - - [22/Sep/2020 16:56:50] "GET /favicon.ico HTTP/1.1" 200 1166


Shutting down server on port 5000.


  "__main__", mod_spec)



Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.
