In [1]:
import spacy

In [2]:
nlp = spacy.blank("en")

In [3]:
print(type(nlp))

<class 'spacy.lang.en.English'>


In [4]:
doc = nlp("Captain america ate 5$ somosa. Then he said i could do this all the day.")

for token in doc:
    print(token)

Captain
america
ate
5
$
somosa
.
Then
he
said
i
could
do
this
all
the
day
.


In [5]:
# Get pipeline of spacy.blank("en") class
nlp.pipe_names


[]

In [6]:
doc = nlp("Captain america ate 5$ somosa. Then he said i could do this all the day.")

for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)

Captain  |    |  
america  |    |  
ate  |    |  
5  |    |  
$  |    |  
somosa  |    |  
.  |    |  
Then  |    |  
he  |    |  
said  |    |  
i  |    |  
could  |    |  
do  |    |  
this  |    |  
all  |    |  
the  |    |  
day  |    |  
.  |    |  


By default **nlp = spacy.blank("en")** has no pipeline, so **token.pos_** and **token.lemma_** does not work.

In [7]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [8]:
# Get pipeline of spacy.load("en_core_web_sm") class
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [9]:
doc = nlp("Captain america ate 5$ somosa. Then he said i could do this all the day.")

for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)

Captain  |  PROPN  |  Captain
america  |  PROPN  |  america
ate  |  VERB  |  eat
5  |  NUM  |  5
$  |  SYM  |  $
somosa  |  NOUN  |  somosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
i  |  PRON  |  I
could  |  AUX  |  could
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
the  |  DET  |  the
day  |  NOUN  |  day
.  |  PUNCT  |  .


in **nlp = spacy.load("en_core_web_sm")**, **token.pos_** comes from **tagger** Pipeline and **token.lemma_** comes from **lemmatizer** Pipeline.

In [10]:
doc = nlp("Tesla Inc is acquired Twitter for 45$ Billon.")

for entity in doc.ents:
    print(entity.text, " | ", entity.label_, " | ", spacy.explain(entity.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  PRODUCT  |  Objects, vehicles, foods, etc. (not services)
45$ Billon  |  MONEY  |  Monetary values, including unit


Here **entity.label_** comes from **ner** Pipeline.

In [11]:
# Adding more style
from spacy import displacy
displacy.render(doc, style="ent")

In [12]:
nlp = spacy.blank("en")

doc = nlp("Tesla Inc is acquired Twitter for 45$ Billon.")

for entity in doc.ents:
    print(entity.text, " | ", entity.label_, " | ", spacy.explain(entity.label_))

Adding **"ner"** pipeline to **spacy.blank("en")** from **spacy.load("en_core_web_sm")**

In [13]:
sourch_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")

nlp.add_pipe("ner", source=sourch_nlp)

nlp.pipe_names

['ner']

In [14]:
doc = nlp("Tesla Inc is acquired Twitter for 45$ Billon.")

for entity in doc.ents:
    print(entity.text, " | ", entity.label_, " | ", spacy.explain(entity.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  PRODUCT  |  Objects, vehicles, foods, etc. (not services)
45$ Billon  |  MONEY  |  Monetary values, including unit
