In [None]:
import spacy
import nltk

`Note:` Spacy don't have `stemming`.

## Stemming


In [None]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

In [None]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
  print(word, " | ", stemmer.stem(word))

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  ate
adjustable  |  adjust
rafting  |  raft
ability  |  abil
meeting  |  meet


## Lemmatization

In [None]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for token in doc:
  print(token,  " | ", token.lemma_)

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  eat
adjustable  |  adjustable
rafting  |  raft
ability  |  ability
meeting  |  meeting
better  |  well


In [None]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

`attribute_ruler` assigns attributes to `tokens`, which we can modify.

In [None]:
# demonstration of "attribute_ruler"

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

# in above sentence, "bro" and "brah" is not a proper english words. But their meaning is "brother". So "lemma_" won't be able to recognize it.

#e.g.
for token in doc:
  print(token.text, " | ", token.lemma_)

Bro  |  bro
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brah
,  |  ,
do  |  do
n't  |  not
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust


In [None]:
# but we can modify the tokens with help of "attribute_ruler"

ar = nlp.get_pipe("attribute_ruler")

ar.add([[{"TEXT": "Bro"}], [{"TEXT":"Brah"}]], {"LEMMA":"Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
  print(token.text, " | ", token.lemma_)

Bro  |  Brother
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brother
,  |  ,
do  |  do
n't  |  not
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust
