# Named Entity Recognition

## Part1

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+'-'+ent.label_+'-'+str(spacy.explain(ent.label_)))
    else:
        print('No entities found')

In [3]:
doc4 = nlp(u'Hi How are you?')
show_ents(doc4)

No entities found


In [4]:
doc5 = nlp(u"May I go Washington D.C next May to see the Washington Monument")

In [5]:
show_ents(doc5)

Washington D.C-GPE-Countries, cities, states
next May-DATE-Absolute or relative dates or periods
the Washington Monument-ORG-Companies, agencies, institutions, etc.


In [6]:
doc6 = nlp(u"Can I please have 500 dollars of Microsoft stock?")

In [7]:
show_ents(doc6)

500 dollars-MONEY-Monetary values, including unit
Microsoft-ORG-Companies, agencies, institutions, etc.


In [8]:
doc7 = nlp(u"Tesla to build a U.K Factory for $6 million")

In [9]:
show_ents(doc7)

$6 million-MONEY-Monetary values, including unit


## Telling spacy Tesla is an 'ORG'

In [10]:
from spacy.tokens import Span

In [11]:
ORG = doc7.vocab.strings[u'ORG']
ORG

381

In [12]:
#Creating a span for the new entity
new_ent = Span(doc7, 0, 1, label=ORG)
doc7.ents = list(doc7.ents) + [new_ent]

In [13]:
show_ents(doc7)

Tesla-ORG-Companies, agencies, institutions, etc.
$6 million-MONEY-Monetary values, including unit


### Part2

In [23]:
#Adding multiple phrases as NERs
doc8 = nlp(u'Our company created a brand new vaccum cleaner.'
           u'This vaccum-cleaner is the best in the show.')

In [24]:
show_ents(doc8)

No entities found


#### Adding vaccum-cleaner and vaccum cleaner as product entity


In [25]:
from spacy.matcher import PhraseMatcher

In [26]:
matcher = PhraseMatcher(nlp.vocab)

In [27]:
phrase_list = ['vaccum-cleaner','vaccum cleaner']

In [28]:
phrase_patterns = [nlp(text) for text in phrase_list] #converting list to nlp

In [29]:
matcher.add('newproduct', None, *phrase_patterns)

In [30]:
found_matches = matcher(doc8)
print(found_matches)

[(2689272359382549672, 6, 8), (2689272359382549672, 10, 13)]


In [32]:
# we can actually create spans for each match
PROD = doc8.vocab.strings[u'PRODUCT']

In [33]:
#Using list comprehension
new_ents = [Span(doc8, match[1], match[2], label=PROD) for match in found_matches]
doc8.ents = list(doc8.ents) + new_ents

In [34]:
show_ents(doc8)

vaccum cleaner-PRODUCT-Objects, vehicles, foods, etc. (not services)
vaccum-cleaner-PRODUCT-Objects, vehicles, foods, etc. (not services)


In [35]:
found_matches[1]

(2689272359382549672, 10, 13)

## Counting Entities

In [37]:
doc9 = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")

In [38]:
show_ents(doc9)

29.95-MONEY-Monetary values, including unit
10 dollars-MONEY-Monetary values, including unit


In [40]:
len([ent for ent in doc9.ents if ent.label_ == "MONEY"])

2

## Visualizing Named Entity Recognition

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')
from spacy import displacy

In [6]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $ 6million."
          u"By contrast, Sony only sold 8 thousand walkman music players.")
displacy.render(doc, style='ent', jupyter=True)

In [7]:
# Seperating into different segmentation/lines
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter='True')

In [8]:
# When we want to see only perticular entity
options = {'ents':['PRODUCT']}
displacy.render(doc, style='ent',options=options, jupyter='True')

In [13]:
#Choosing Highlighting Colors 
# You can also add linear gradient or radial gradient to it. 
colors = {'ORG':'red', 'PRODUCT':'radial-gradient(yellow,green)', 'DATE':'linear-gradient(90deg,#aa92fc,#fc9ce7)'}
options = {'ents':['PRODUCT','ORG','DATE'], 'colors':colors}
displacy.render(doc, style='ent',options=options, jupyter='True')

In [None]:
displacy.serve(doc,style='ent', options=options)


[93m    Serving on port 5000...[0m
    Using the 'ent' visualizer



127.0.0.1 - - [15/Jan/2020 13:31:09] "GET / HTTP/1.1" 200 2139
127.0.0.1 - - [15/Jan/2020 13:31:09] "GET /favicon.ico HTTP/1.1" 200 2139
