# Example of PorterStemmer()

In [1]:
import nltk

In [2]:
from nltk.stem import PorterStemmer
porter = PorterStemmer()
words = ['Insert','Inserting','Insertion','Inserted','Inserts','generous','generate',
         'generously','generation','eating','eats','eaten','puts','putting']
for word in words:
    print(word,"--->",porter.stem(word))

Insert ---> insert
Inserting ---> insert
Insertion ---> insert
Inserted ---> insert
Inserts ---> insert
generous ---> gener
generate ---> gener
generously ---> gener
generation ---> gener
eating ---> eat
eats ---> eat
eaten ---> eaten
puts ---> put
putting ---> put


# Example of SnowballStemmer()

In [3]:
from nltk.stem import SnowballStemmer
snowball = SnowballStemmer(language='english')
words = ['Insert','Inserting','Insertion','Inserted','Inserts','generous','generate','generously','generation',
         'eating','eats','eaten','puts','putting']
for word in words:
    print(word,"--->",snowball.stem(word))

Insert ---> insert
Inserting ---> insert
Insertion ---> insert
Inserted ---> insert
Inserts ---> insert
generous ---> generous
generate ---> generat
generously ---> generous
generation ---> generat
eating ---> eat
eats ---> eat
eaten ---> eaten
puts ---> put
putting ---> put


# Example of LancasterStemmer()

In [4]:
from nltk.stem import LancasterStemmer
lancaster = LancasterStemmer()
words = ['Insert','Inserting','Insertion','Inserted','Inserts','generous','generate','generously','generation',
         'eating','eats','eaten','puts','putting']
for word in words:
    print(word,"--->",lancaster.stem(word))

Insert ---> insert
Inserting ---> insert
Insertion ---> insert
Inserted ---> insert
Inserts ---> insert
generous ---> gen
generate ---> gen
generously ---> gen
generation ---> gen
eating ---> eat
eats ---> eat
eaten ---> eat
puts ---> put
putting ---> put


# Comparison

In [7]:
from nltk.stem import PorterStemmer, SnowballStemmer, LancasterStemmer, RegexpStemmer
porter = PorterStemmer()
lancaster = LancasterStemmer()
snowball = SnowballStemmer(language='english')
regexp = RegexpStemmer('ing$|s$|able$|ion$', min=4)
word_list = ['Insert','Inserting','Insertion','Inserted','Inserts','generous','generate','generously','generation',
         'eating','eats','eaten','puts','putting']
print("{0:20}{1:20}{2:20}{3:30}{4:40}".format("Word","Porter Stemmer","Snowball Stemmer","Lancaster Stemmer",'Regexp Stemmer'))
for word in word_list:
    print("{0:20}{1:20}{2:20}{3:30}{4:40}".format(word,porter.stem(word),snowball.stem(word),lancaster.stem(word),regexp.stem(word)))


Word                Porter Stemmer      Snowball Stemmer    Lancaster Stemmer             Regexp Stemmer                          
Insert              insert              insert              insert                        Insert                                  
Inserting           insert              insert              insert                        Insert                                  
Insertion           insert              insert              insert                        Insert                                  
Inserted            insert              insert              insert                        Inserted                                
Inserts             insert              insert              insert                        Insert                                  
generous            gener               generous            gen                           generou                                 
generate            gener               generat             gen                    