<h3>Stemming in NLTK</h3>

In [1]:
import nltk

In [2]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

In [3]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting", "better"]

for word in words:
    print(word, "|", stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet
better | better


<h3>Lemmatization in Spacy</h3>

In [4]:
import spacy

In [5]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for token in doc:
    print(token, " | ", token.lemma_, "|", token.lemma)

eating  |  eat | 9837207709914848172
eats  |  eat | 9837207709914848172
eat  |  eat | 9837207709914848172
ate  |  eat | 9837207709914848172
adjustable  |  adjustable | 6033511944150694480
rafting  |  raft | 7154368781129989833
ability  |  ability | 11565809527369121409
meeting  |  meeting | 14798207169164081740
better  |  well | 4525988469032889948


<h3>Customizing lemmatizer</h3>

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
doc1 = nlp("Bro you wanna go? Brah don't say no I am exhausted")

for token in doc1:
    print(token.text, "|", token.lemma_)

Bro | bro
you | you
wanna | wanna
go | go
? | ?
Brah | Brah
do | do
n't | not
say | say
no | no
I | I
am | be
exhausted | exhaust


In [8]:
ar = nlp.get_pipe('attribute_ruler')

ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc2 = nlp("Bro you wanna go? Brah don't say no I am exhausted")

for token in doc2:
    print(token.text, "|", token.lemma_)

Bro | Brother
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
do | do
n't | not
say | say
no | no
I | I
am | be
exhausted | exhaust
