<h2>Named Entity Recognition (NER)</h2>

In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [3]:
doc1 = nlp("Elon Musk plans to send a team of engineers to Mars by 2030.")

In [4]:
for ent in doc1.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Elon Musk  |  PERSON  |  People, including fictional
Mars  |  LOC  |  Non-GPE locations, mountain ranges, bodies of water
2030  |  DATE  |  Absolute or relative dates or periods


In [5]:
for ent in doc1.ents:
    print(ent.text)
    print(ent.label)
    print(ent.label_ ,': ', str(spacy.explain(ent.label_)))
    print('Tokens : ', ent.start ,' >> ', ent.end)
    print('Chars : ', ent.start_char ,' >> ', ent.end_char)
    print('----------------------------------------------')

Elon Musk
380
PERSON :  People, including fictional
Tokens :  0  >>  2
Chars :  0  >>  9
----------------------------------------------
Mars
385
LOC :  Non-GPE locations, mountain ranges, bodies of water
Tokens :  10  >>  11
Chars :  47  >>  51
----------------------------------------------
2030
391
DATE :  Absolute or relative dates or periods
Tokens :  12  >>  13
Chars :  55  >>  59
----------------------------------------------


In [6]:
from spacy import displacy

displacy.render(doc1, style="ent")

In [7]:
doc2 = nlp("Tim Cook, CEO of Apple, announced a new line of iPhones to be released in September.")

for ent in doc2.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tim Cook  |  PERSON  |  People, including fictional
Apple  |  ORG  |  Companies, agencies, institutions, etc.
iPhones  |  ORG  |  Companies, agencies, institutions, etc.
September  |  DATE  |  Absolute or relative dates or periods


In [8]:
from spacy import displacy

displacy.render(doc2, style="ent")

In [9]:
doc3 = nlp("Amazon CEO Jeff Bezos sold company shares worth $2 billion.")

for ent in doc3.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Amazon  |  ORG  |  Companies, agencies, institutions, etc.
Jeff Bezos  |  PERSON  |  People, including fictional
$2 billion  |  MONEY  |  Monetary values, including unit


In [10]:
from spacy import displacy

displacy.render(doc3, style="ent")

<h3>Setting custom entities</h3>

In [11]:
doc4 = nlp("Twitter confirms acquisition deal by tech giant, with a staggering price tag of $45 billion.")

for ent in doc4.ents:
    print(ent.text, " | ", ent.label_)

$45 billion  |  MONEY


In [12]:
custom_ent = doc4[5:7]
custom_ent

tech giant

In [13]:
type(custom_ent)

spacy.tokens.span.Span

In [14]:
from spacy.tokens import Span

s1 = Span(doc4, 0, 1, label="ORG")
s2 = Span(doc4, 5, 7, label="ORG")

doc4.set_ents([s1, s2], default="unmodified")

In [15]:
for ent in doc4.ents:
    print(ent.text, " | ", ent.label_)

Twitter  |  ORG
tech giant  |  ORG
$45 billion  |  MONEY
