## NLTK

In [1]:
import nltk

### Stemming

In [2]:
from nltk.stem import PorterStemmer

In [3]:
stemmer = PorterStemmer()

In [4]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting","Unfortunately", "Impossible"]

In [5]:
for word in words:
    print(word,'|',stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet
Unfortunately | unfortun
Impossible | imposs


### Lemmatization

In [6]:
from nltk.stem import WordNetLemmatizer

In [7]:
lemmatizer = WordNetLemmatizer()

In [8]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting","Unfortunately", "Impossible"]

In [9]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /Users/raihan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [10]:
for word in words:
    print(word,'|', lemmatizer.lemmatize(word,pos='v'))

eating | eat
eats | eat
eat | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meet
Unfortunately | Unfortunately
Impossible | Impossible


## Spacy

In [11]:
import spacy

In [12]:
nlp = spacy.load('en_core_web_sm')

In [13]:
doc1 = nlp("Raj talked for 3 hours although talking isn't his thing")
doc2= nlp("eating eats eat ate adjustable rafting ability meeting better")

In [14]:
for token in doc1:
    print(token,'|',token.lemma_)

Raj | raj
talked | talk
for | for
3 | 3
hours | hour
although | although
talking | talk
is | be
n't | not
his | his
thing | thing


In [15]:
for token in doc2:
    print(token,'|',token.lemma_)

eating | eat
eats | eat
eat | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meeting
better | well


### Customize lemmatizer

In [16]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [17]:
doc = nlp("Bro, you wanna go? Bruh, don't say no! I am exhausted")

In [18]:
for token in doc:
    print(token,'|',token.lemma_)

Bro | bro
, | ,
you | you
wanna | wanna
go | go
? | ?
Bruh | bruh
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [24]:
ar = nlp.get_pipe('attribute_ruler')
ar.add([[{"TEXT": "Bro"}],[{"TEXT":"Bruh"}]],{"LEMMA":"Brother"})
doc = nlp("Bro, you wanna go? Bruh, don't say no! I am exhausted")


In [25]:
for token in doc:
    print(token,'|',token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Bruh | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust
