<a href="https://colab.research.google.com/github/poojamahajan0712/medium_blog/blob/master/NLP/NER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Importing required libraries
import spacy

# Loading language model
nlp = spacy.load('en_core_web_sm') 
# 1. spacy.load() is a convenience wrapper that reads the language ID and pipeline components from a model’s meta.json, initializes the Language class, loads in the model data and returns it.
# 2. en_core_web_sm- Available pretrained statistical models for English, English multi-task CNN trained on OntoNotes. Assigns context-specific token vectors, POS tags, dependency parse and named entities.




# 1. Getting entities from document 

In [None]:
doc = nlp(u'Microsoft Corporation is an American multinational technology company with headquarters in Redmond')

for ent in doc.ents:
            print(ent.text+' -- '+ent.label_+' -- '+spacy.explain(ent.label_))
           


Microsoft Corporation -- ORG -- Companies, agencies, institutions, etc.
American -- NORP -- Nationalities or religious or political groups
Redmond -- GPE -- Countries, cities, states


# 2. Setting entity annotations


In [None]:
doc = nlp("Suprdaily began its journey in the suburbs of Mumbai")
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print('Before', ents)
#Before [('Mumbai', 46, 52, 'GPE')]   the model didn't recognise "Suprdaily" as an entity 



Before [('Mumbai', 46, 52, 'GPE')]


In [None]:
sup_ent = spacy.tokens.Span(doc, 0, 1, label="ORG")
 # create a Span for the new entity-- Span is a slice from a Doc object.
doc.ents = list(doc.ents) + [sup_ent]

ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print('After', ents)
# After [('Suprdaily', 0, 9, 'ORG'), ('Mumbai', 46, 52, 'GPE')]

After [('Suprdaily', 0, 9, 'ORG'), ('Mumbai', 46, 52, 'GPE')]


# 3. Visualising Entities

In [None]:
spacy.displacy.render(doc, style='ent', jupyter=True)

In [None]:
doc = nlp(u'The company also acquired WhiteHat Jr in a deal worth $300 million')
spacy.displacy.render(doc, style='ent', jupyter=True)

* Viewing Specific Entities

In [None]:
#ents(list)-Entity types to highlight (None for all types),colors(dict)-Color overrides.
# Entity types in uppercase should be mapped to color names or values.


# Viewing Specific Entities
doc = nlp(u'The company also acquired WhiteHat Jr in a deal worth $300 million')
colors = {'ORG': 'pink',}
options = {'ents': ['ORG'], 'colors':colors}
spacy.displacy.render(doc, style='ent', jupyter=True, options=options)