**Stemming in NLTK**

In [1]:
import spacy
import nltk

In [2]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

In [3]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word, "|", stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


**Lemmatization in Spacy**

In [4]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for word in doc:
    print(word, "|", word.lemma_)

eating | eat
eats | eat
eat | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meeting
better | well


In [15]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("Mando talked for 3 hours although talking isn't his thing")

for word in doc:
    print(word, "|", word.lemma_, "|", word.lemma)

Mando | Mando | 7837215228004622142
talked | talk | 13939146775466599234
for | for | 16037325823156266367
3 | 3 | 602994839685422785
hours | hour | 9748623380567160636
although | although | 343236316598008647
talking | talk | 13939146775466599234
is | be | 10382539506755952630
n't | not | 447765159362469301
his | his | 2661093235354845946
thing | thing | 2473243759842082748


**Customizing Lemmatizer**

In [16]:
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token.text, "|", token.lemma_)

Bro | bro
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brah
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


Here bro is bro and brah is brah but acutally it is brother so we can customize it

In [17]:
ar = nlp.get_pipe('attribute_ruler')

ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token.text, "|", token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [20]:
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Get the attribute ruler component
ar = nlp.get_pipe('attribute_ruler')

# Define patterns and attributes
patterns = [[{"TEXT": "Bro"}], [{"TEXT": "Brah"}], [{"TEXT": "wanna"}]]
attrs = [{"LEMMA": "Brother"}, {"LEMMA": "Brother"}, {"LEMMA": "want"}]

# Add custom rules to the attribute ruler
for pattern, attr in zip(patterns, attrs):
    ar.add(patterns=[pattern], attrs=attr)

# Process a text with the custom rules
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

# Print the tokens and their lemmas
for token in doc:
    print(token.text, "|", token.lemma_)


Bro | Brother
, | ,
you | you
wanna | want
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust
