In [1]:
import nltk
import spacy

In [5]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

In [6]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word, " | ", stemmer.stem(word))

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  ate
adjustable  |  adjust
rafting  |  raft
ability  |  abil
meeting  |  meet


In [None]:
# stemming did not do anything to ate because it has no knowledge of language where as lemmatizer have

In [10]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for token in doc:
    print(token, " | ", token.lemma_)

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  eat
adjustable  |  adjustable
rafting  |  raft
ability  |  ability
meeting  |  meeting
better  |  well


In [None]:
# spacy's lemmatization did work well.

In [11]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [12]:
doc = nlp("Bro, you wanna go? Brah, dont't say no! I am exhausted")

for token in doc:
    print(token.text, " | ", token.lemma_)

Bro  |  bro
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brah
,  |  ,
dont't  |  dont't
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust


In [13]:
# it did'nt classify bro, if we want to customize our pipeline then we do it like this way:

In [15]:
ar = nlp.get_pipe('attribute_ruler')

ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]], {"LEMMA" : "Brother"})

doc = nlp("Bro, you wanna go? Brah, dont't say no! I am exhausted")

for token in doc:
    print(token.text, " | ", token.lemma_)

Bro  |  Brother
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brother
,  |  ,
dont't  |  dont't
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust
