# Stemming In NLTK


In [1]:
from nltk.stem import PorterStemmer
stemmer=PorterStemmer()

In [2]:
#Stemming is a text preprocessing technique used in natural language processing (NLP) to reduce words to their root or base form
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word,"|",stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


# Lemmatization In Spacy

In [3]:
import spacy
#Lemmatization analyzes the context of a word, including its intended part of speech, meaning, and the surrounding sentence. For example, the word "saw" could be returned as "see" or "saw" depending on whether it's used as a noun or verb in the sentence.


In [5]:
nlp=spacy.load("en_core_web_sm")

doc=nlp("Mando talked for 3 hours although talking isn't his thing")
doc=nlp("eating eats eat ate adjustable rafting ability meeting better")
for token in doc:
    print(token,"|",token.lemma_)

eating | eat
eats | eat
eat | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meeting
better | well


**Customizing lemmatizer**

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
ar=nlp.get_pipe("attribute_ruler")
ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc=nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token.text, "|", token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [8]:
doc[6]

Brah

In [9]:
doc[6].lemma_

'Brother'