<h3>Stemming in NLTK</h3>

In [2]:
pip install nltk

Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2025.9.18-cp313-cp313-win_amd64.whl.metadata (41 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   ------------- -------------------------- 0.5/1.5 MB 583.2 kB/s eta 0:00:02
   ------------- -------------------------- 0.5/1.5 MB 583.2 kB/s eta 0:00:02
   -------------------- ------------------- 0.8/1.5 MB 670.3 kB/s eta 0:00:02
   --------------------------- ------------ 1.0/1.5 MB 719.0 kB/s eta 0:00:01
   --------------------------- ------------ 1.0/1.5 MB 719.0 kB/s eta 0:00:01
   -------------------------

In [3]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

In [4]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word, "|", stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


<h3>Lemmatization in Spacy</h3>

In [5]:
import spacy

In [7]:
# Install the spaCy English model if not already installed
%pip install spacy
!python -m spacy download en_core_web_sm

nlp = spacy.load("en_core_web_sm")

doc = nlp("Mando talked for 3 hours although talking isn't his thing")
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")
for token in doc:
    print(token, " | ", token.lemma_)

Note: you may need to restart the kernel to use updated packages.
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
     -- ------------------------------------- 0.8/12.8 MB 2.1 MB/s eta 0:00:06
     --- ------------------------------------ 1.0/12.8 MB 1.8 MB/s eta 0:00:07
     ---- ----------------------------------- 1.3/12.8 MB 1.7 MB/s eta 0:00:07
     ---- ----------------------------------- 1.6/12.8 MB 1.5 MB/s eta 0:00:08
     ---- ----------------------------------- 1.6/12.8 MB 1.5 MB/s eta 0:00:08
     ----- ---------------------------------- 1.8/12.8 MB 1.4 MB/s eta 0:00:08
     ------ --------------------------------- 2.1/12.8 MB 1.3 MB/s e

<h3>Customizing lemmatizer</h3>

In [8]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [9]:
ar = nlp.get_pipe('attribute_ruler')

ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token.text, "|", token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [10]:
doc[6]

Brah

In [11]:
doc[6].lemma_

'Brother'