# Stemmer using NLTK

In [1]:
import nltk
from nltk.stem.porter import PorterStemmer

In [2]:
p_stemmer = PorterStemmer()

words = ['run', 'ran', 'runs', 'runner', 'easily', 'fairly']

for word in words:
    print(word + '---->' + p_stemmer.stem(word))

run---->run
ran---->ran
runs---->run
runner---->runner
easily---->easili
fairly---->fairli


In [3]:
from nltk.stem.snowball import SnowballStemmer

s_stemmer = SnowballStemmer(language='english')

In [4]:
for word in words:
    print(word + '---->' + s_stemmer.stem(word))

run---->run
ran---->ran
runs---->run
runner---->runner
easily---->easili
fairly---->fair


In [5]:
words = ['generous', 'generation', 'generously', 'generate']

In [6]:
for word in words:
    print(word + '---->' + s_stemmer.stem(word))

generous---->generous
generation---->generat
generously---->generous
generate---->generat


In [7]:
for word in words:
    print(word + '---->' + p_stemmer.stem(word))

generous---->gener
generation---->gener
generously---->gener
generate---->gener


# Lemmatization using SPACY

In [1]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [15]:
doc = nlp(u'I am a runner and I run regulary in the running track where I used to ran in my childhood. Now Boby is running relentlessly.')

In [16]:
def show_lemma(doc):
    for token in doc:
        print(f'{token.text:{15}}{token.pos_:{10}}{token.lemma:<{25}}{token.lemma_:{15}}')

In [17]:
show_lemma(doc)

I              PRON      4690420944186131903      I              
am             AUX       10382539506755952630     be             
a              DET       11901859001352538922     a              
runner         NOUN      12640964157389618806     runner         
and            CCONJ     2283656566040971221      and            
I              PRON      4690420944186131903      I              
run            VERB      12767647472892411841     run            
regulary       ADV       4419975782549702806      regulary       
in             ADP       3002984154512732771      in             
the            DET       7425985699627899538      the            
running        NOUN      12212083579121184944     running        
track          NOUN      13353920538491180942     track          
where          SCONJ     16318918034475841628     where          
I              PRON      4690420944186131903      I              
used           VERB      6873750497785110593      use            
to        