# Stemming in NLTK

In [1]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

In [2]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word, " | ", stemmer.stem(word))

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  ate
adjustable  |  adjust
rafting  |  raft
ability  |  abil
meeting  |  meet


# Lemmatization in Spacy

In [3]:
import spacy

In [4]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("Mando talked for 3 hours although talking isn't his thing")
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for token in doc:
    print(token, " | ", token.lemma_)

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  eat
adjustable  |  adjustable
rafting  |  raft
ability  |  ability
meeting  |  meeting
better  |  well


## Customizing Lemmatizer

In [5]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
ar = nlp.get_pipe('attribute_ruler')

ar.add([[{'TEXT':'Bro'}], [{'TEXT':'Brah'}]], {'LEMMA':'Brother'})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
    print(token, " | ", token.lemma_)

Bro  |  Brother
,  |  ,
you  |  you
wanna  |  wanna
go  |  go
?  |  ?
Brah  |  Brother
,  |  ,
do  |  do
n't  |  not
say  |  say
no  |  no
!  |  !
I  |  I
am  |  be
exhausted  |  exhaust


In [7]:
doc[6]

Brah

In [8]:
doc[6].lemma_

'Brother'

## Exercise

In [9]:
#using stemming in nltk
stemmer = PorterStemmer()

lst_words = ['running', 'painting', 'walking', 'dressing', 'likely', 'children', 'whom', 'good', 'ate', 'fishing']

for word in lst_words:
    print(word, " | ", stemmer.stem(word))

running  |  run
painting  |  paint
walking  |  walk
dressing  |  dress
likely  |  like
children  |  children
whom  |  whom
good  |  good
ate  |  ate
fishing  |  fish


In [10]:
#using lemmatization in spacy
nlp = spacy.load("en_core_web_sm")

doc = nlp("running painting walking dressing likely children who good ate fishing")

for token in doc:
    print(token, " | ", token.lemma_)

running  |  run
painting  |  paint
walking  |  walk
dressing  |  dress
likely  |  likely
children  |  child
who  |  who
good  |  good
ate  |  eat
fishing  |  fishing


# Exercise

## Using Stemming in NLTK

In [11]:
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a 
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""

In [12]:
nlp = spacy.load("en_core_web_sm")

doc = nlp(text)
# word tokenization
words = []
for token in doc:
    words.append(token.text)
    
print(words)

['Latha', 'is', 'very', 'multi', 'talented', 'girl', '.', 'She', 'is', 'good', 'at', 'many', 'skills', 'like', 'dancing', ',', 'running', ',', 'singing', ',', 'playing', '.', 'She', 'also', 'likes', 'eating', 'Pav', 'Bhagi', '.', 'she', 'has', 'a', '\n', 'habit', 'of', 'fishing', 'and', 'swimming', 'too', '.', 'Besides', 'all', 'this', ',', 'she', 'is', 'a', 'wonderful', 'at', 'cooking', 'too', '.', '\n']


In [13]:
# converting words to their base word
base_word = []
for word in words:
    base_word.append(stemmer.stem(word))
    
base_word

['latha',
 'is',
 'veri',
 'multi',
 'talent',
 'girl',
 '.',
 'she',
 'is',
 'good',
 'at',
 'mani',
 'skill',
 'like',
 'danc',
 ',',
 'run',
 ',',
 'sing',
 ',',
 'play',
 '.',
 'she',
 'also',
 'like',
 'eat',
 'pav',
 'bhagi',
 '.',
 'she',
 'ha',
 'a',
 '\n',
 'habit',
 'of',
 'fish',
 'and',
 'swim',
 'too',
 '.',
 'besid',
 'all',
 'thi',
 ',',
 'she',
 'is',
 'a',
 'wonder',
 'at',
 'cook',
 'too',
 '.',
 '\n']

In [14]:
# joining the base word
text = " ".join(base_word)
text

'latha is veri multi talent girl . she is good at mani skill like danc , run , sing , play . she also like eat pav bhagi . she ha a \n habit of fish and swim too . besid all thi , she is a wonder at cook too . \n'

## Using lemmatisation in spacy

In [15]:
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a 
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""

In [16]:
nlp = spacy.load("en_core_web_sm")

doc = nlp(text)
# converting in base word
words = []
for token in doc:
    words.append(token.lemma_)
    
words

['Latha',
 'be',
 'very',
 'multi',
 'talented',
 'girl',
 '.',
 'she',
 'be',
 'good',
 'at',
 'many',
 'skill',
 'like',
 'dancing',
 ',',
 'running',
 ',',
 'singing',
 ',',
 'play',
 '.',
 'she',
 'also',
 'like',
 'eat',
 'Pav',
 'Bhagi',
 '.',
 'she',
 'have',
 'a',
 '\n',
 'habit',
 'of',
 'fishing',
 'and',
 'swim',
 'too',
 '.',
 'besides',
 'all',
 'this',
 ',',
 'she',
 'be',
 'a',
 'wonderful',
 'at',
 'cook',
 'too',
 '.',
 '\n']

In [17]:
text = " ".join(words)
text

'Latha be very multi talented girl . she be good at many skill like dancing , running , singing , play . she also like eat Pav Bhagi . she have a \n habit of fishing and swim too . besides all this , she be a wonderful at cook too . \n'