In [1]:
!pip install spacy



In [2]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m77.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

doc = nlp("SpaCy is an advanced NLP library.")
print(doc)


SpaCy is an advanced NLP library.


In [3]:
for token in doc:
    print(f"Token: {token.text}, Is Alpha: {token.is_alpha}, Is Stop Word: {token.is_stop}")


Token: SpaCy, Is Alpha: True, Is Stop Word: False
Token: is, Is Alpha: True, Is Stop Word: True
Token: an, Is Alpha: True, Is Stop Word: True
Token: advanced, Is Alpha: True, Is Stop Word: False
Token: NLP, Is Alpha: True, Is Stop Word: False
Token: library, Is Alpha: True, Is Stop Word: False
Token: ., Is Alpha: False, Is Stop Word: False


In [5]:
for token in doc:
    print(f"Token: {token.text}, POS: {token.pos_}, Detailed Tag: {token.tag_}")


Token: SpaCy, POS: PROPN, Detailed Tag: NNP
Token: is, POS: AUX, Detailed Tag: VBZ
Token: an, POS: DET, Detailed Tag: DT
Token: advanced, POS: ADJ, Detailed Tag: JJ
Token: NLP, POS: PROPN, Detailed Tag: NNP
Token: library, POS: NOUN, Detailed Tag: NN
Token: ., POS: PUNCT, Detailed Tag: .


In [7]:
for ent in doc.ents:
    print(ent)
    print(f"Entity: {ent.text}, Label: {ent.label_}, Explanation: {spacy.explain(ent.label_)}")


NLP
Entity: NLP, Label: ORG, Explanation: Companies, agencies, institutions, etc.


In [9]:
for token in doc:
    print(f"Token:{token.text},Lemma :{token.lemma_}")

Token:SpaCy,Lemma :15763240395850944880
Token:is,Lemma :10382539506755952630
Token:an,Lemma :15099054000809333061
Token:advanced,Lemma :3943929226210916060
Token:NLP,Lemma :15832915187156881108
Token:library,Lemma :1785747669126016609
Token:.,Lemma :12646065887601541794


In [10]:
for token in doc:
    print(f"Token: {token.text},Dependency:{token.dep_},Head : {token.head.text}")


Token: SpaCy,Dependency:nsubj,Head : is
Token: is,Dependency:ROOT,Head : is
Token: an,Dependency:det,Head : library
Token: advanced,Dependency:amod,Head : library
Token: NLP,Dependency:compound,Head : library
Token: library,Dependency:attr,Head : is
Token: .,Dependency:punct,Head : is


In [11]:
from spacy import displacy
displacy.serve(doc,style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [15]:
from spacy.language import Language
import spacy

@Language.component("custom_component")
def custom_component(doc):
    print("Custom pipeline component executed!")
    return doc

nlp = spacy.blank("en")

nlp.add_pipe("custom_component", last=True)

doc = nlp("Adding custom components in SpaCy.")
doc

Custom pipeline component executed!


Adding custom components in SpaCy.

In [17]:
import spacy
from spacy.tokens import DocBin
from spacy.training.example import Example

nlp = spacy.blank('en')
ner = nlp.add_pipe("ner")
ner.add_label("TECH")
Train_Data = [ ("SpaCy is an NLP library.", {"entities": [(0, 5, "TECH")]}),
    ("Transformers is popular in NLP.", {"entities": [(0, 12, "TECH")]}),
]
doc_bin = DocBin()
for text, annotations in Train_Data:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)
doc_bin.to_disk("./train.spacy")

In [26]:
import spacy

nlp = spacy.load("en_core_web_sm")

doc1 = nlp("I love programming.")
doc2 = nlp("I love coding.")

print(doc1.similarity(doc2))

0.9062661090812003


  print(doc1.similarity(doc2))


In [24]:
import spacy

nlp = spacy.blank("en")

nlp.add_pipe("sentencizer")

doc = nlp("This is the first sentence. This is the second sentence.")

for sent in doc.sents:
    print(sent)


This is the first sentence.
This is the second sentence.
