In [51]:
# Named entity recognition
import spacy

nlp = spacy.load('en_core_web_sm')

In [52]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [53]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla Inc  |  ORG
Twitter Inc  |  ORG
$45 billion  |  MONEY


In [54]:
from spacy import displacy

displacy.render(doc, style="ent")

In [55]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [56]:
doc = nlp("MIchael Bloomberg founded Bloomberg L.P in 1982")


for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

MIchael Bloomberg  |  PERSON  |  People, including fictional
Bloomberg L.P  |  PERSON  |  People, including fictional
1982  |  DATE  |  Absolute or relative dates or periods


In [57]:
type(doc[0])

spacy.tokens.token.Token

In [58]:
type(doc[2:5])

spacy.tokens.span.Span

In [59]:
doc[2:5]

founded Bloomberg L.P

In [60]:
doc = nlp("Tesla is going to acquire Twitter for $45 billion ")


for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  PRODUCT  |  Objects, vehicles, foods, etc. (not services)
$45 billion  |  MONEY  |  Monetary values, including unit


In [61]:
from spacy.tokens import Span

t1 = Span(doc, 0, 1, label="ORG")
t2 = Span(doc, 5, 6, label="ORG")

doc.set_ents([t1, t2], default="unmodified")

In [62]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [63]:
for label in nlp.pipe_labels['ner']:
    print(label, ' | ', spacy.explain(label))

CARDINAL  |  Numerals that do not fall under another type
DATE  |  Absolute or relative dates or periods
EVENT  |  Named hurricanes, battles, wars, sports events, etc.
FAC  |  Buildings, airports, highways, bridges, etc.
GPE  |  Countries, cities, states
LANGUAGE  |  Any named language
LAW  |  Named documents made into laws.
LOC  |  Non-GPE locations, mountain ranges, bodies of water
MONEY  |  Monetary values, including unit
NORP  |  Nationalities or religious or political groups
ORDINAL  |  "first", "second", etc.
ORG  |  Companies, agencies, institutions, etc.
PERCENT  |  Percentage, including "%"
PERSON  |  People, including fictional
PRODUCT  |  Objects, vehicles, foods, etc. (not services)
QUANTITY  |  Measurements, as of weight or distance
TIME  |  Times smaller than a day
WORK_OF_ART  |  Titles of books, songs, etc.


In [64]:
doc = nlp(
    "New York is a City of Joy, I am feeling happy ,my phone number (555) 555-5555")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

New York  |  GPE
a City of Joy  |  GPE
555  |  CARDINAL
555  |  CARDINAL


In [65]:
# make own ner model

In [66]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [74]:
nlp.remove_pipe('entity_ruler')

('entity_ruler', <spacy.pipeline.entityruler.EntityRuler at 0x754a23edc050>)

In [81]:
nlp.pipe_names

['tok2vec',
 'tagger',
 'parser',
 'attribute_ruler',
 'lemmatizer',
 'ner',
 'entity_ruler']