In [1]:
# Install spaCy and download the English model if not already installed
# !pip install spacy
# !python -m spacy download en_core_web_sm

import spacy

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

# Example text
text = "spaCy is an amazing NLP library for Python! It can perform tokenization, POS tagging, and named entity recognition."

# Process the text
doc = nlp(text)

print("\n")







In [2]:
# 1. Tokenization with Part-of-speech and Dependency Parsing
print("=== Tokenization, POS, and Dependency Parsing ===")
for token in doc:
    print(f"{token.text:15} POS: {token.pos_:10} Dep: {token.dep_}")
print("\n")

=== Tokenization, POS, and Dependency Parsing ===
spaCy           POS: NUM        Dep: nsubj
is              POS: AUX        Dep: ROOT
an              POS: DET        Dep: det
amazing         POS: ADJ        Dep: amod
NLP             POS: PROPN      Dep: compound
library         POS: NOUN       Dep: attr
for             POS: ADP        Dep: prep
Python          POS: PROPN      Dep: pobj
!               POS: PUNCT      Dep: punct
It              POS: PRON       Dep: nsubj
can             POS: AUX        Dep: aux
perform         POS: VERB       Dep: ROOT
tokenization    POS: NOUN       Dep: dobj
,               POS: PUNCT      Dep: punct
POS             POS: PROPN      Dep: compound
tagging         POS: NOUN       Dep: conj
,               POS: PUNCT      Dep: punct
and             POS: CCONJ      Dep: cc
named           POS: VERB       Dep: conj
entity          POS: NOUN       Dep: compound
recognition     POS: NOUN       Dep: oprd
.               POS: PUNCT      Dep: punct




In [3]:
# 2. Named Entity Recognition (NER)
print("=== Named Entities ===")
for ent in doc.ents:
    print(f"Entity: {ent.text:20} Label: {ent.label_}")
print("\n")

=== Named Entities ===
Entity: NLP                  Label: ORG




In [4]:
# 3. Lemmatization
print("=== Lemmatization ===")
for token in doc:
    print(f"{token.text:15} Lemma: {token.lemma_}")
print("\n")

=== Lemmatization ===
spaCy           Lemma: spacy
is              Lemma: be
an              Lemma: an
amazing         Lemma: amazing
NLP             Lemma: NLP
library         Lemma: library
for             Lemma: for
Python          Lemma: Python
!               Lemma: !
It              Lemma: it
can             Lemma: can
perform         Lemma: perform
tokenization    Lemma: tokenization
,               Lemma: ,
POS             Lemma: POS
tagging         Lemma: tagging
,               Lemma: ,
and             Lemma: and
named           Lemma: name
entity          Lemma: entity
recognition     Lemma: recognition
.               Lemma: .




In [5]:
# 4. Sentence Segmentation
print("=== Sentences ===")
for sent in doc.sents:
    print(sent.text)
print("\n")

=== Sentences ===
spaCy is an amazing NLP library for Python!
It can perform tokenization, POS tagging, and named entity recognition.




In [6]:
# 5. Stop Word Removal
print("=== Stop Word Removal ===")
filtered_tokens = [token.text for token in doc if not token.is_stop]
print("Filtered Tokens:", filtered_tokens)
print("\n")

=== Stop Word Removal ===
Filtered Tokens: ['spaCy', 'amazing', 'NLP', 'library', 'Python', '!', 'perform', 'tokenization', ',', 'POS', 'tagging', ',', 'named', 'entity', 'recognition', '.']




In [7]:
# 6. Parts of Speech Tagging with Explanation
print("=== Parts of Speech (POS) Tagging ===")
print(f"{'Token':15} {'POS':10} Explanation")
print("-" * 40)
for token in doc:
    print(f"{token.text:15} {token.pos_:10} {spacy.explain(token.pos_)}")

=== Parts of Speech (POS) Tagging ===
Token           POS        Explanation
----------------------------------------
spaCy           NUM        numeral
is              AUX        auxiliary
an              DET        determiner
amazing         ADJ        adjective
NLP             PROPN      proper noun
library         NOUN       noun
for             ADP        adposition
Python          PROPN      proper noun
!               PUNCT      punctuation
It              PRON       pronoun
can             AUX        auxiliary
perform         VERB       verb
tokenization    NOUN       noun
,               PUNCT      punctuation
POS             PROPN      proper noun
tagging         NOUN       noun
,               PUNCT      punctuation
and             CCONJ      coordinating conjunction
named           VERB       verb
entity          NOUN       noun
recognition     NOUN       noun
.               PUNCT      punctuation
