In [3]:
import spacy

In [4]:
nlp = spacy.blank("en")
doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day. he has some money")

for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_)

Captain  |  None  |  
america  |  None  |  
ate  |  None  |  
100  |  None  |  
$  |  None  |  
of  |  None  |  
samosa  |  None  |  
.  |  None  |  
Then  |  None  |  
he  |  None  |  
said  |  None  |  
I  |  None  |  
can  |  None  |  
do  |  None  |  
this  |  None  |  
all  |  None  |  
day  |  None  |  
.  |  None  |  
he  |  None  |  
has  |  None  |  
some  |  None  |  
money  |  None  |  




#### Download trained pipeline
To download trained pipeline use a command such as,

python -m spacy download en_core_web_sm

This downloads the small (sm) pipeline for english language

Further instructions on : https://spacy.io/usage/models#quickstart

In [5]:
nlp = spacy.load("en_core_web_sm")
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x2ce4c5efdd0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x2ce4c5efef0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x2ce4c5e90e0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x2ce4c7bb210>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x2ce4c686010>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x2ce4b37fa00>)]

In [7]:
doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day. he has some money")

for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_)

Captain  |  proper noun  |  Captain
america  |  proper noun  |  america
ate  |  verb  |  eat
100  |  numeral  |  100
$  |  numeral  |  $
of  |  adposition  |  of
samosa  |  proper noun  |  samosa
.  |  punctuation  |  .
Then  |  adverb  |  then
he  |  pronoun  |  he
said  |  verb  |  say
I  |  pronoun  |  I
can  |  auxiliary  |  can
do  |  verb  |  do
this  |  pronoun  |  this
all  |  determiner  |  all
day  |  noun  |  day
.  |  punctuation  |  .
he  |  pronoun  |  he
has  |  verb  |  have
some  |  determiner  |  some
money  |  noun  |  money


### Name Entity Recognition

In [8]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


In [9]:
from spacy import displacy

displacy.render(doc, style = "ent")

### Pipeline in French

In [13]:
spacy.cli.download("fr_core_news_sm")

✔ Download and installation successful
You can now load the package via spacy.load('fr_core_news_sm')


In [14]:
nlp = spacy.load("fr_core_news_sm")

In [17]:
doc = nlp("Tesla Inc va racheter Twitter pour $45 milliards de dollars")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  MISC  |  Miscellaneous entities, e.g. events, nationalities, products or works of art


In [16]:
displacy.render(doc, style = "ent")

In [19]:
for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_)

Tesla  |  proper noun  |  Tesla
Inc  |  proper noun  |  Inc
va  |  verb  |  aller
racheter  |  verb  |  racheter
Twitter  |  verb  |  twitter
pour  |  adposition  |  pour
$  |  noun  |  dollar
45  |  numeral  |  45
milliards  |  noun  |  milliard
de  |  adposition  |  de
dollars  |  noun  |  dollar


### Addign a component to a blank pipeline

In [20]:
spacy.cli.download("en_core_web_sm")

✔ Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')


In [21]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [22]:

doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


In [23]:
nlp.pipeline

[('ner', <spacy.pipeline.ner.EntityRecognizer at 0x2ce4c5eaea0>)]