In [30]:
import nltk
import spacy

from IPython.display import display
from nltk.stem import PorterStemmer, SnowballStemmer
from spacy.lang.en import English
from spacy.pipeline.attributeruler import AttributeRuler
from spacy.tokens.doc import Doc

## Stemming

In [16]:
stemmer: PorterStemmer = PorterStemmer()
# stemmer: SnowballStemmer = SnowballStemmer("english")

words: list[str] = [
	"eating",
	"eats",
	"eat",
	"eaten",
	"ate",
	"adjustable",
	"rafting",
	"ability",
	"meeting",
	"better"
]

for word in words:
	print(f"{word} -> {stemmer.stem(word)}")

eating -> eat
eats -> eat
eat -> eat
eaten -> eaten
ate -> ate
adjustable -> adjust
rafting -> raft
ability -> abil
meeting -> meet
better -> better


## Lemmatization

In [21]:
nlp: English = spacy.load("en_core_web_sm")

doc: Doc = nlp(" ".join(words))

for token in doc:
	print(f"{token.text} -> {token.lemma_} ({token.lemma})")

eating -> eat (9837207709914848172)
eats -> eat (9837207709914848172)
eat -> eat (9837207709914848172)
eaten -> eat (9837207709914848172)
ate -> eat (9837207709914848172)
adjustable -> adjustable (6033511944150694480)
rafting -> raft (7154368781129989833)
ability -> ability (11565809527369121409)
meeting -> meeting (14798207169164081740)
better -> well (4525988469032889948)


In [27]:
nlp = spacy.load("en_core_web_md")

doc = nlp("Mando talked for 3 hours although talking isn't his thing. He became talkative.")

for token in doc:
	print(f"{token.text} -> {token.lemma_}")

Mando -> Mando
talked -> talk
for -> for
3 -> 3
hours -> hour
although -> although
talking -> talk
is -> be
n't -> not
his -> his
thing -> thing
. -> .
He -> he
became -> become
talkative -> talkative
. -> .


In [26]:
display(nlp.pipe_names)

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']


In [32]:
ar: AttributeRuler = nlp.get_pipe("attribute_ruler")
ar.add([[{"TEXT": "Bro"}], [{"TEXT": "Brah"}]], {"LEMMA": "Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted.")

for token in doc:
	print(f"{token.text} -> {token.lemma_}")

Bro -> Brother
, -> ,
you -> you
wanna -> wanna
go -> go
? -> ?
Brah -> Brother
, -> ,
do -> do
n't -> not
say -> say
no -> no
! -> !
I -> I
am -> be
exhausted -> exhausted
. -> .
