## Named Entity Recognition-NER

### Use Cases 

In [6]:
import spacy

In [7]:
nlp = spacy.load("en_core_web_sm")

In [3]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer']

In [4]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

In [5]:
#printing entities
for ent in doc.ents: 
    print(ent)

Tesla Inc
$45 billion


In [9]:
doc = nlp("Tesla Inc. is going to acquire twitter for $45 billion")

for ent in doc.ents: # doc.ents contain all the entities. 
    print(ent.text, "|", ent.label_)

Tesla Inc. | ORG
$45 billion | MONEY


In [20]:
doc = nlp("Tesla Inc. is going to acquire Twitter for $45 billion")

for ent in doc.ents:  
    print(ent.text, "|", ent.label_, spacy.explain(ent.label_))

Tesla Inc. | ORG Companies, agencies, institutions, etc.
Twitter | PRODUCT Objects, vehicles, foods, etc. (not services)
$45 billion | MONEY Monetary values, including unit


In [21]:
from spacy import displacy

displacy.render(doc, style="ent")

### Entities covered by spaCy

In [24]:
nlp.pipe_labels["ner"]

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [27]:
doc = nlp("Michael Bloomberg founded Bloomberg Inc. in 1982")

for ent in doc.ents:
    print(ent.text, "|", ent.label_)

Michael Bloomberg | PERSON
Bloomberg Inc. | ORG
1982 | DATE


### Set new entity

In [35]:
doc = nlp("tesla is going to acquire twitter for $45 billion")

for ent in doc.ents:  
    print(ent.text, "|", ent.label_)

$45 billion | MONEY


In [45]:
doc[0] # showing span of the tokens

tesla

In [46]:
type(doc[0])

spacy.tokens.token.Token

In [47]:
doc[2:5] # showing span of the tokens

going to acquire

In [48]:
type(doc[2:5])

spacy.tokens.span.Span

In [50]:
from spacy.tokens import Span

In [53]:
s1 = Span(doc, 0,1, label="ORG")
s2 = Span(doc, 5,6, label="ORG")

doc.set_ents([s1,s2], default = "unmodified")

In [54]:
for ent in doc.ents:  
    print(ent.text, "|", ent.label_)

tesla | ORG
twitter | ORG
$45 billion | MONEY
