<a href="https://colab.research.google.com/github/AbdulWahabRaza123/NLP/blob/main/SpacyPipelinesNLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import spacy



In [4]:
nlp=spacy.blank("en")
doc=nlp("captain america ate 50$ of samosas then said I can do this all the daty")
for token in doc:
  print(token)

captain
america
ate
50
$
of
samosas
then
said
I
can
do
this
all
the
daty


In [5]:
#at this time our nlp pipeline is empty
nlp.pipe_names

[]

In [6]:
#lets add a pipeline
nlp=spacy.load("en_core_web_sm")

In [7]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7f938cff5460>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7f938cff5ee0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7f938d16b270>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7f938cf24380>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7f938cf2e2c0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7f938d16bc80>)]

In [15]:
#here we also have name entity recognization
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [9]:
#lets play with langauge now showing parts o speech | 1st form
doc=nlp("captain america ate 50$ of samosas then said I can do this all the daty")
for token in doc:
  print(token," | ",token.pos_," | ",token.lemma_)

captain  |  PROPN  |  captain
america  |  PROPN  |  america
ate  |  VERB  |  eat
50  |  NUM  |  50
$  |  NUM  |  $
of  |  ADP  |  of
samosas  |  NOUN  |  samosa
then  |  ADV  |  then
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
the  |  DET  |  the
daty  |  NOUN  |  daty


In [11]:
doc=nlp("Tesla Inc is going to acquire twitter for $45 billions")
#here this pipeline can also tell you the label and explaination of any word
for ent in doc.ents:
  print(ent.text," | ",ent.label_," | ",spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billions  |  MONEY  |  Monetary values, including unit


In [13]:
# to get good display of output we can use displacy
from spacy import displacy

displacy.render(doc,style="ent")

'<div class="entities" style="line-height: 2.5; direction: ltr">\n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Tesla Inc\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n is going to acquire twitter for \n<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    $45 billions\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">MONEY</span>\n</mark>\n</div>'

###But we can't use adnavce functionalities with blank pipelines

In [17]:
nlp=spacy.blank("en")
#lets try the same fun with the blank pipeline
doc=nlp("captain america ate 50$ of samosas then said I can do this all the daty")
for token in doc:
  print(token," | ",token.pos_," | ",token.lemma_)

captain  |    |  
america  |    |  
ate  |    |  
50  |    |  
$  |    |  
of  |    |  
samosas  |    |  
then  |    |  
said  |    |  
I  |    |  
can  |    |  
do  |    |  
this  |    |  
all  |    |  
the  |    |  
daty  |    |  


###To select the specfic pipeline from english pipeline

In [20]:
source_nlp=spacy.load("en_core_web_sm")
nlp=spacy.blank("en")
nlp.add_pipe("ner",source=source_nlp)
nlp.pipe_names

['ner']

In [22]:
doc=nlp("Tesla Inc is going to acquire twitter for $45 billions")
#here this pipeline can also tell you the label and explaination of any word
for ent in doc.ents:
  print(ent.text," | ",ent.label_," | ",spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billions  |  MONEY  |  Monetary values, including unit


In [23]:
#but we can't get these things now because we has limited pipelines
doc=nlp("captain america ate 50$ of samosas then said I can do this all the daty")
for token in doc:
  print(token," | ",token.pos_," | ",token.lemma_)

captain  |    |  
america  |    |  
ate  |    |  
50  |    |  
$  |    |  
of  |    |  
samosas  |    |  
then  |    |  
said  |    |  
I  |    |  
can  |    |  
do  |    |  
this  |    |  
all  |    |  
the  |    |  
daty  |    |  
