## NER
 - A process in natural language processing that identifies and categorizes entities like people, places, and organizations in text.
 - Used in tasks like question answering, information retrieval, text summarization, and creating knowledge graphs. 

In [2]:
sent1 = 'Mary from the HR department said that The Ritz London was a great hotel option to stay in London'
sent2 = "Steve Jobs and Steve Wozniak founded Apple on April 1, 1976 in Cupertino, California"

In [4]:
import nltk
words1 = nltk.word_tokenize(sent1)
words2 = nltk.word_tokenize(sent2)

In [8]:
## tagged tokens pos
pos1 = nltk.pos_tag(words1)
pos2 = nltk.pos_tag(words2)

In [16]:
nltk.download('maxent_ne_chunker_tab')
nltk.download('words')

[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     C:\Users\piyus\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker_tab is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\piyus\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\words.zip.


True

In [20]:

nltk.ne_chunk(pos1).draw()

In [22]:
nltk.ne_chunk(pos2).draw()

## using SPACY

In [25]:
import spacy
nlp = spacy.load('en_core_web_sm')
doc1 = nlp(sent1)
doc2 = nlp(sent2)

In [37]:
doc1.ents, doc2.ents   # entities that spacy recognised

((Mary, The Ritz London, London),
 (Steve Jobs, Steve Wozniak, Apple, April 1, 1976, Cupertino, California))

In [51]:
# Iterate over the entities in the document and print their text and label

print("Doc 1 Named Entities")
for ent in doc1.ents:
    print(f"- Text: {ent.text}, Label: {ent.label_} ({spacy.explain(ent.label_)})")

print("Doc 2 Named Entities")
for ent in doc2.ents:
    print(f"- Text: {ent.text}, Label: {ent.label_} ({spacy.explain(ent.label_)})")

Doc 1 Named Entities
- Text: Mary, Label: PERSON (People, including fictional)
- Text: The Ritz London, Label: ORG (Companies, agencies, institutions, etc.)
- Text: London, Label: GPE (Countries, cities, states)
Doc 2 Named Entities
- Text: Steve Jobs, Label: PERSON (People, including fictional)
- Text: Steve Wozniak, Label: PERSON (People, including fictional)
- Text: Apple, Label: ORG (Companies, agencies, institutions, etc.)
- Text: April 1, 1976, Label: DATE (Absolute or relative dates or periods)
- Text: Cupertino, Label: GPE (Countries, cities, states)
- Text: California, Label: GPE (Countries, cities, states)


In [43]:
## display NER in text
from spacy import displacy
displacy.render(doc1, style='ent')

In [45]:
displacy.render(doc1, style='dep')

In [53]:
displacy.render(doc2, style='ent', jupyter=True)

In [55]:
displacy.render(doc2, style='dep', jupyter=True)