In [3]:
import nltk
import spacy

#### Stemming in NLTK

In [4]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]
for word in words:
    print(word, "|", stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


#### Lemmatization in Spacy

In [6]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for token in doc:
    print(token, "|", token.lemma_)

eating | eat
eats | eat
eat | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meeting
better | well


In [7]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

### Customize Lemma

In [None]:
ar = nlp.get_pipe('attribute_ruler')
ar.add(
    [
        [{"TEXT": "Bro"}],
        [{"TEXT": "Brah"}]
    ],
    {"LEMMA": "Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhauster")

for token in doc:
    print(token.text, "|", token.lemma_)

### Exercise

In [10]:
#for nltk
import nltk
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
#for spacy
import spacy
nlp = spacy.load("en_core_web_sm")

In [16]:
#using stemming in nltk
lst_words = ['running', 'painting', 'walking', 'dressing', 'likely', 'children', 'whom', 'good', 'ate', 'fishing']

for word in lst_words:
    print(word, "|", stemmer.stem(word))

running | run
painting | paint
walking | walk
dressing | dress
likely | like
children | children
whom | whom
good | good
ate | ate
fishing | fish


In [15]:
#using lemmatization in spacy

doc = nlp("running painting walking dressing likely children whom good ate fishing")
for token in doc:
    print(token, " | ", token.lemma_)

running  |  run
painting  |  paint
walking  |  walk
dressing  |  dress
likely  |  likely
children  |  child
whom  |  whom
good  |  good
ate  |  eat
fishing  |  fishing


In [26]:
#using lemmatisation in spacy
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a 
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""

#step1: Creating the object for the given text
doc = nlp(text)
all_base_words = []

#step2: getting the base form for each token using spacy 'lemma_'
for token in doc:
  base_word =  token.lemma_
  print(base_word)
  all_base_words.append(base_word)


#step3: joining all words in a list into string using 'join()'
final_base_text = ' '.join(all_base_words)
print(final_base_text)

Latha
be
very
multi
talented
girl
.
she
be
good
at
many
skill
like
dancing
,
running
,
singing
,
play
.
she
also
like
eat
Pav
Bhagi
.
she
have
a


habit
of
fishing
and
swim
too
.
besides
all
this
,
she
be
a
wonderful
at
cook
too
.


Latha be very multi talented girl . she be good at many skill like dancing , running , singing , play . she also like eat Pav Bhagi . she have a 
 habit of fishing and swim too . besides all this , she be a wonderful at cook too . 

