In [1]:
# Install spaCy and download the English model if not already installed
# !pip install spacy
# !python -m spacy download en_core_web_sm

import spacy

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

# Example text
text = "I bought this wireless keyboard last week, and it works better than expected. The battery lasts a long time, and the keys feel comfortable. My only issue is that the Bluetooth connection occasionally drops."

# Process the text
doc = nlp(text)

print("\n")







In [3]:
# 1. Tokenization with Part-of-speech and Dependency Parsing
print("=== Tokenization, POS, and Dependency Parsing ===")
for token in doc:
    print(f"{token.text:15} POS: {token.pos_:10} Dep: {token.dep_}")
print("\n")

=== Tokenization, POS, and Dependency Parsing ===
I               POS: PRON       Dep: nsubj
bought          POS: VERB       Dep: ROOT
this            POS: DET        Dep: det
wireless        POS: ADJ        Dep: amod
keyboard        POS: NOUN       Dep: dobj
last            POS: ADJ        Dep: amod
week            POS: NOUN       Dep: npadvmod
,               POS: PUNCT      Dep: punct
and             POS: CCONJ      Dep: cc
it              POS: PRON       Dep: nsubj
works           POS: VERB       Dep: conj
better          POS: ADV        Dep: advmod
than            POS: SCONJ      Dep: mark
expected        POS: VERB       Dep: advcl
.               POS: PUNCT      Dep: punct
The             POS: DET        Dep: det
battery         POS: NOUN       Dep: nsubj
lasts           POS: VERB       Dep: ROOT
a               POS: DET        Dep: det
long            POS: ADJ        Dep: amod
time            POS: NOUN       Dep: dobj
,               POS: PUNCT      Dep: punct
and             PO

In [5]:
# 2. Named Entity Recognition (NER)
print("=== Named Entities ===")
for ent in doc.ents:
    print(f"Entity: {ent.text:20} Label: {ent.label_}")
print("\n")

=== Named Entities ===
Entity: last week            Label: DATE




In [7]:
# 3. Lemmatization
print("=== Lemmatization ===")
for token in doc:
    print(f"{token.text:15} Lemma: {token.lemma_}")
print("\n")

=== Lemmatization ===
I               Lemma: I
bought          Lemma: buy
this            Lemma: this
wireless        Lemma: wireless
keyboard        Lemma: keyboard
last            Lemma: last
week            Lemma: week
,               Lemma: ,
and             Lemma: and
it              Lemma: it
works           Lemma: work
better          Lemma: well
than            Lemma: than
expected        Lemma: expect
.               Lemma: .
The             Lemma: the
battery         Lemma: battery
lasts           Lemma: last
a               Lemma: a
long            Lemma: long
time            Lemma: time
,               Lemma: ,
and             Lemma: and
the             Lemma: the
keys            Lemma: key
feel            Lemma: feel
comfortable     Lemma: comfortable
.               Lemma: .
My              Lemma: my
only            Lemma: only
issue           Lemma: issue
is              Lemma: be
that            Lemma: that
the             Lemma: the
Bluetooth       Lemma: Bluetooth
con

In [9]:
# 4. Sentence Segmentation
print("=== Sentences ===")
for sent in doc.sents:
    print(sent.text)
print("\n")

=== Sentences ===
I bought this wireless keyboard last week, and it works better than expected.
The battery lasts a long time, and the keys feel comfortable.
My only issue is that the Bluetooth connection occasionally drops.




In [11]:
# 5. Stop Word Removal
print("=== Stop Word Removal ===")
filtered_tokens = [token.text for token in doc if not token.is_stop]
print("Filtered Tokens:", filtered_tokens)
print("\n")

=== Stop Word Removal ===
Filtered Tokens: ['bought', 'wireless', 'keyboard', 'week', ',', 'works', 'better', 'expected', '.', 'battery', 'lasts', 'long', 'time', ',', 'keys', 'feel', 'comfortable', '.', 'issue', 'Bluetooth', 'connection', 'occasionally', 'drops', '.']




In [13]:
# 6. Parts of Speech Tagging with Explanation
print("=== Parts of Speech (POS) Tagging ===")
print(f"{'Token':15} {'POS':10} Explanation")
print("-" * 40)
for token in doc:
    print(f"{token.text:15} {token.pos_:10} {spacy.explain(token.pos_)}")

=== Parts of Speech (POS) Tagging ===
Token           POS        Explanation
----------------------------------------
I               PRON       pronoun
bought          VERB       verb
this            DET        determiner
wireless        ADJ        adjective
keyboard        NOUN       noun
last            ADJ        adjective
week            NOUN       noun
,               PUNCT      punctuation
and             CCONJ      coordinating conjunction
it              PRON       pronoun
works           VERB       verb
better          ADV        adverb
than            SCONJ      subordinating conjunction
expected        VERB       verb
.               PUNCT      punctuation
The             DET        determiner
battery         NOUN       noun
lasts           VERB       verb
a               DET        determiner
long            ADJ        adjective
time            NOUN       noun
,               PUNCT      punctuation
and             CCONJ      coordinating conjunction
the             DET    