# Stemming in NLTK

In [1]:
from nltk.stem import PorterStemmer
obj = PorterStemmer()

In [2]:
words = ["eating", 'eats','eat','ate', "adjustable",'rafting', 'ability','meeting']
for word in words:
    print(word, " | ",obj.stem(word))

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  ate
adjustable  |  adjust
rafting  |  raft
ability  |  abil
meeting  |  meet


# Lemmatization in spacy

In [11]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [12]:
doc = nlp("Mando talked for 3 hours although talking isn't his thing")
for token in doc:
    print(token, " | ", token.lemma_)

Mando  |  mando
talked  |  talk
for  |  for
3  |  3
hours  |  hour
although  |  although
talking  |  talking
is  |  be
n't  |  not
his  |  his
thing  |  thing


#### here it convert different words of different tenses into thier base words

In [13]:
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")
for token in doc:
    print(token, " | ", token.lemma_)

eating  |  eating
eats  |  eat
eat  |  eat
ate  |  eat
adjustable  |  adjustable
rafting  |  raft
ability  |  ability
meeting  |  meeting
better  |  well


<h3>Customizing lemmatizer</h3>
sometimes, we have some languages slangs in our text, so in order to convert it into actuall base words we need to customize our lemmatization

In [14]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [15]:
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token ,' | ', token.lemma_)

Bro  |  Bro
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brah
,  |  ,
do  |  do
n't  |  not
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust


# we first need to add our custom cahngings to attribute ruler and than we can generate our desire result

In [17]:
a = nlp.get_pipe('attribute_ruler')

In [18]:
a.add([[{'Text': 'Bro'},{'Text': 'Brah'}]], {"LEMMA": "Brother"})

In [19]:
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

In [20]:
for token in doc:
    print(token ,' | ', token.lemma_)

Bro  |  Bro
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brah
,  |  ,
do  |  do
n't  |  not
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust


In [22]:
doc[6]

Brah

In [23]:
doc[6].lemma_

'Brother'

In [21]:
ar = nlp.get_pipe('attribute_ruler')
ar.add([[{"Text": "Bro"}],[{"TEXT":"Brah"}]], {"LEMMA": "Brother"})
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token ,' | ', token.lemma_)

Bro  |  Brother
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brother
,  |  ,
do  |  do
n't  |  not
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust
