In [1]:
!pip install spacy




In [2]:
import spacy


###  Blank pipeline

In [3]:
nlp = spacy.blank('en')

In [4]:
doc = nlp('I recieved an amount of $100 per hour. I work as a Data Scientinst')

In [5]:
for token in doc:
    print(token)

I
recieved
an
amount
of
$
100
per
hour
.
I
work
as
a
Data
Scientinst


In [6]:
nlp.pipe_names

[]

### Trained pipelines

In [7]:
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m869.1 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [8]:
nlp = spacy.load("en_core_web_sm")


In [9]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x13737e570>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x13762e030>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1374ae0a0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x13763f650>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x13763af10>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1374ae180>)]

In [10]:
doc = nlp('I recieved an amount of $100 per hour. I work as a Data Scientinst.')

In [11]:
for token in doc:
    print(token, '|', spacy.explain(token.pos_), '|', token.lemma_)

I | pronoun | I
recieved | verb | recieve
an | determiner | an
amount | noun | amount
of | adposition | of
$ | symbol | $
100 | numeral | 100
per | adposition | per
hour | noun | hour
. | punctuation | .
I | pronoun | I
work | verb | work
as | adposition | as
a | determiner | a
Data | proper noun | Data
Scientinst | proper noun | Scientinst
. | punctuation | .


### Named Entity Recognition (NER)

In [12]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


In [13]:
from spacy import displacy

displacy.render(doc, style="ent")

### Trained pipeline(French)

In [16]:
!python -m spacy download fr_core_news_sm





Collecting fr-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.7.0/fr_core_news_sm-3.7.0-py3-none-any.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Installing collected packages: fr-core-news-sm
Successfully installed fr-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('fr_core_news_sm')


In [17]:
nlp = spacy.load("fr_core_news_sm")


In [18]:
doc = nlp("Tesla Inc va racheter Twitter pour $45 milliards de dollars")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  MISC  |  Miscellaneous entities, e.g. events, nationalities, products or works of art


In [19]:
for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)

Tesla  |  PROPN  |  Tesla
Inc  |  PROPN  |  Inc
va  |  VERB  |  aller
racheter  |  VERB  |  racheter
Twitter  |  VERB  |  twitter
pour  |  ADP  |  pour
$  |  NOUN  |  dollar
45  |  NUM  |  45
milliards  |  NOUN  |  milliard
de  |  ADP  |  de
dollars  |  NOUN  |  dollar


### Adding component in blank pipeline

In [20]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [21]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY
