In [1]:
import spacy

In [2]:
nlp = spacy.blank("en")

doc = nlp("Daniel ate 100$ of kebab. Then he said give me more kebab.")

for token in doc:
    print(token)

Daniel
ate
100
$
of
kebab
.
Then
he
said
give
me
more
kebab
.


In [3]:
nlp.pipe_names

[]

In [4]:
nlp = spacy.load("en_core_web_sm")
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [5]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7fea5bf2cef0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7fe95d48d1f0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7fe95d4809e0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7fe95d13dd50>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7fe95d3ecf10>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7fe95d480900>)]

In [7]:
doc = nlp("Daniel ate 100$ of kebab. Then he said give me more kebab.")

for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_) #.pos_(part of speech) check grammar in sentence, Lemma: based word

Daniel  |  proper noun  |  Daniel
ate  |  verb  |  eat
100  |  numeral  |  100
$  |  numeral  |  $
of  |  adposition  |  of
kebab  |  proper noun  |  kebab
.  |  punctuation  |  .
Then  |  adverb  |  then
he  |  pronoun  |  he
said  |  verb  |  say
give  |  verb  |  give
me  |  pronoun  |  I
more  |  adverb  |  more
kebab  |  adjective  |  kebab
.  |  punctuation  |  .


Named Entity Recognition

In [9]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [10]:
from spacy import displacy

displacy.render(doc, style="ent")

Adding a component to a blank pipeline

In [11]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [14]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY
