In [1]:
import spacy

In [2]:
nlp = spacy.blank("en")

doc = nlp("Captain America ate 100$ of watermelon. Then he said he doesn't want anymore.")

In [3]:
for token in doc:
    print(token)

Captain
America
ate
100
$
of
watermelon
.
Then
he
said
he
does
n't
want
anymore
.


In [4]:
nlp.pipe_names

[]

In [5]:
nlp = spacy.load("en_core_web_sm")

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7e43d8cd3c40>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7e43d89d15a0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7e43d8cf66c0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7e43d88484c0>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7e43d8a23780>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7e43d8cf60a0>)]

In [9]:
doc = nlp("Captain America ate 100$ of watermelon. Then he said he doesn't want anymore.")

for token in doc:
    print(token, "|", token.pos_, "|", token.lemma_)

Captain | PROPN | Captain
America | PROPN | America
ate | VERB | eat
100 | NUM | 100
$ | NUM | $
of | ADP | of
watermelon | NOUN | watermelon
. | PUNCT | .
Then | ADV | then
he | PRON | he
said | VERB | say
he | PRON | he
does | AUX | do
n't | PART | not
want | VERB | want
anymore | ADV | anymore
. | PUNCT | .


In [18]:
doc = nlp("Twitter is going to buy chips for $5")
doc2 = nlp("Twitter Inc is going to buy chips for $5")

for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

for ent in doc2.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Twitter | PERSON | People, including fictional
$5 | MONEY | Monetary values, including unit
Twitter Inc | ORG | Companies, agencies, institutions, etc.
$5 | MONEY | Monetary values, including unit


In [19]:
from spacy import displacy

displacy.render(doc, style="ent")

In [20]:
displacy.render(doc2, style="ent")

In [25]:
nlp = spacy.load("fr_core_news_sm")

In [26]:
doc = nlp("Twitter Inc va acheter des chips pour $5")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Twitter Inc | ORG | Companies, agencies, institutions, etc.


In [27]:
displacy.render(doc, style="ent")

In [28]:
nlp = spacy.blank("en")

In [29]:
source_nlp = spacy.load("en_core_web_sm")

In [30]:
nlp.add_pipe("ner", source=source_nlp)

<spacy.pipeline.ner.EntityRecognizer at 0x7e43d60fccf0>

In [31]:
nlp.pipe_names

['ner']

In [32]:
doc = nlp("Twitter is going to buy chips for $5")

for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Twitter | PERSON | People, including fictional
$5 | MONEY | Monetary values, including unit
