In [1]:
import spacy

In [3]:
nlp = spacy.blank("en")

doc = nlp("Dr. Strange loves pav bhaji of Mumbai. Hulk loves chaat of delhi")

for token in doc:
    print(token)

Dr.
Strange
loves
pav
bhaji
of
Mumbai
.
Hulk
loves
chaat
of
delhi


In [4]:
nlp.pipe_names

[]

In [8]:
nlp = spacy.load("en_core_web_sm")

In [10]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x244001d1180>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x244001d30a0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x2447ea15620>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x24400f13d40>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x24400f1edc0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x2447ea15700>)]

In [13]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [12]:
doc = nlp("Captain america ate 100$ samosa. Then he said I can do this all day.")
# tagger give pos, lemmitizer gives lemma
for token in doc:
    print(token," | ",token.pos_," | ",token.lemma_)

Captain  |  PROPN  |  Captain
america  |  PROPN  |  america
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  SYM  |  $
samosa  |  NOUN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
.  |  PUNCT  |  .


In [21]:
# ner recognizes entitiy
doc = nlp("Tesla Inc is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text," | ",ent.label_,' | ',spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  PRODUCT  |  Objects, vehicles, foods, etc. (not services)
$45 billion  |  MONEY  |  Monetary values, including unit


In [22]:
from spacy import displacy
# visual display
displacy.render(doc,style="ent")

In [24]:
# blank pipeline has no components, so it prints nothing
nlp = spacy.blank("en")
doc = nlp("Tesla Inc is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text," | ",ent.label_,' | ',spacy.explain(ent.label_))

In [25]:
# we  will add ner, for detecting entity
# loading source to add ner from
source_nlp = spacy.load("en_core_web_sm")
# adding ner pipeline
nlp.add_pipe("ner",source=source_nlp)
nlp.pipe_names


['ner']

In [26]:
doc = nlp("Tesla Inc is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text," | ",ent.label_,' | ',spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  PRODUCT  |  Objects, vehicles, foods, etc. (not services)
$45 billion  |  MONEY  |  Monetary values, including unit
