In [1]:
import spacy

In [2]:
# using pretrained pipeline after downloading it by running command 
# python -m spacy download (language initials (english is en and for hindi id hi))_core_web_sm
nlp = spacy.load('en_core_web_sm')

In [3]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x2a9a8feff40>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x2a9a8feefe0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x2a9a8ea7060>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x2a9a9112880>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x2a9a9207400>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x2a9a8ea6ff0>)]

In [4]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day. wowhtroirg")

for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_)

Captain  |  proper noun  |  Captain
america  |  proper noun  |  america
ate  |  verb  |  eat
100  |  numeral  |  100
$  |  numeral  |  $
of  |  adposition  |  of
samosa  |  proper noun  |  samosa
.  |  punctuation  |  .
Then  |  adverb  |  then
he  |  pronoun  |  he
said  |  verb  |  say
I  |  pronoun  |  I
can  |  auxiliary  |  can
do  |  verb  |  do
this  |  pronoun  |  this
all  |  determiner  |  all
day  |  noun  |  day
.  |  punctuation  |  .
wowhtroirg  |  noun  |  wowhtroirg


# Named Entity Recognition

In [10]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " ", ent.label_," ",(spacy.explain(ent.label_)))

Tesla Inc   ORG   Companies, agencies, institutions, etc.
$45 billion   MONEY   Monetary values, including unit


In [11]:
# for fancy way of displaying
from spacy import displacy
displacy.render(doc, style="ent")

In [12]:
# Adding a component to a blank pipeline
source_nlp = spacy.load("en_core_web_sm")

nl = spacy.blank("en")
nl.add_pipe("ner", source=source_nlp)
nl.pipe_names

['ner']

In [13]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


In below image you can see sentecizer component in the pipeline
![image.png](attachment:image.png)