# Spacy Language Processing Pipelines: Exercises

In [None]:
#importing necessary libraries
import spacy

nlp = spacy.load("en_core_web_sm")  #creating an object and loading the pre-trained model for "English"

In [None]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x79894334e620>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x79894334e7a0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x798943441770>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x798943106280>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7989432c6a80>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7989434417e0>)]

In [None]:
nlp = spacy.blank("en")

doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day")

for token in doc:
  print(token, " | ", token.pos_, " | ", token.lemma_)

Captain  |    |  
america  |    |  
ate  |    |  
100  |    |  
$  |    |  
of  |    |  
samosa  |    |  
.  |    |  
Then  |    |  
he  |    |  
said  |    |  
I  |    |  
can  |    |  
do  |    |  
this  |    |  
all  |    |  
day  |    |  


In [None]:
doc = nlp("Tesla Inc is goin to acquire twitter for $45 billion")

for ent in doc.ents:
  print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_,))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [None]:
from spacy import displacy

displacy.render(doc, style="ent")

'<div class="entities" style="line-height: 2.5; direction: ltr">\n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Tesla Inc\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n is goin to acquire twitter for \n<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    $45 billion\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">MONEY</span>\n</mark>\n</div>'

In [None]:
!python -m spacy download fr_core_news_sm

2023-10-30 10:32:34.070681: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-30 10:32:34.070765: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-30 10:32:34.070810: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Collecting fr-core-news-sm==3.6.0
  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.6.0/fr_core_news_sm-3.6.0-py3-none-any.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m45.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: fr-core-news-sm
Successfully in

In [None]:
nlp = spacy.load("fr_core_news_sm")

doc = nlp("Tesla Inc is vs tacheter Twitter pour $45 milliards de dollars")
for ent in doc.ents:
  print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_,))



Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter  |  MISC  |  Miscellaneous entities, e.g. events, nationalities, products or works of art


In [None]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [None]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_,))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
