# NLP Tutorial: Named Entity Recognition (NER)

In [1]:
import spacy

In [3]:
nlp = spacy.load("en_core_web_sm")
nlp.pipe_names


['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [4]:
print(dir(nlp))

['Defaults', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_components', '_config', '_disabled', '_ensure_doc', '_ensure_doc_with_context', '_factory_meta', '_get_pipe_index', '_has_gpu_model', '_link_components', '_meta', '_multiprocessing_pipe', '_optimizer', '_path', '_pipe_configs', '_pipe_meta', '_resolve_component_status', 'add_pipe', 'analyze_pipes', 'batch_size', 'begin_training', 'component', 'component_names', 'components', 'config', 'create_optimizer', 'create_pipe', 'create_pipe_from_source', 'default_config', 'default_error_handler', 'disable_pipe', 'disable_pipes', 'disabled', 'enable_pipe', 'evaluate', 'factories', 'factory', 'factory_names', 'fro

In [8]:
nlp.__call__

<bound method Language.__call__ of <spacy.lang.en.English object at 0x000001F01069FEE0>>

In [5]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1f02628d900>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1f02628d4e0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1f02568a110>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1f0261eaa40>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1f0261b9480>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1f02568a1f0>)]

In [6]:
nlp.Defaults

spacy.lang.en.EnglishDefaults

In [7]:
nlp.__annotations__

{'lang': typing.Optional[str],
 '_factory_meta': typing.Dict[str, ForwardRef('FactoryMeta')]}

In [9]:
nlp.__dir__

<function English.__dir__()>

In [10]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [12]:
from spacy import displacy

In [13]:
displacy.render(doc,style="ent")

# List down all the entities

In [14]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [17]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", ent.start_char, "|", ent.end_char)

Tesla Inc  |  ORG  |  0 | 9
Twitter Inc  |  ORG  |  30 | 41
$45 billion  |  MONEY  |  46 | 57


# Setting custom entities

In [18]:
doc = nlp("Tesla is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla  |  ORG
Twitter  |  PRODUCT
$45 billion  |  MONEY


In [20]:
s=doc[2:5]
s

going to acquire

In [21]:
type(s)

spacy.tokens.span.Span

In [22]:
from spacy.tokens import Span

s1 = Span(doc, 0, 1, label="ORG")
s2 = Span(doc, 5, 6, label="ORG")

doc.set_ents([s1, s2], default="unmodified")

In [23]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)


Tesla  |  ORG
Twitter  |  ORG
$45 billion  |  MONEY
