# Stemming 

In [1]:
from nltk.stem import PorterStemmer

In [2]:
words = ["eating", "eats" , "eaten" , "writing","writes","programming","PROGRAMS" ,"HISTORY",'FINALLY','FINALIZED' ]

### Porter Stemmer

In [3]:
port_stem = PorterStemmer()

In [4]:
for word in words:
    print( word , '->' , port_stem.stem( word  ) )

eating -> eat
eats -> eat
eaten -> eaten
writing -> write
writes -> write
programming -> program
PROGRAMS -> program
HISTORY -> histori
FINALLY -> final
FINALIZED -> final


### PorterStemmer does not give the exact meaning

### Regexp Stemmer

In [5]:
from nltk.stem import RegexpStemmer

In [6]:
# Custom rule: remove 'ing', 's', 'e', 'able', or 'un'
regexp = r'ing$|s$|e$|able$|^un'

reg_stem = RegexpStemmer( regexp=regexp  , min=4)

### ing$ → removes "ing" at the end of a word

### s$ → removes "s" at the end

### e$ → removes "e" at the end

## able$ → removes "able" at the end

## ^un → removes "un" at the beginning

## min=3 → ensures the stemmed word has at least 3 characters left (avoids over-shortening).

In [7]:
words = [ "cars" , "mass" , "was" , "bee" , "compute" , "advisable" , "unhappy","under"]

In [8]:
for word in words:
    print( word , '->' , reg_stem.stem( word  ) )

cars -> car
mass -> mas
was -> was
bee -> bee
compute -> comput
advisable -> advis
unhappy -> happy
under -> der


# Snowball Stemmer

In [9]:
from nltk.stem import SnowballStemmer

In [10]:
ss = SnowballStemmer( language="english" )

In [11]:
for word in words:
    print( word , '->' , ss.stem( word  ) )

cars -> car
mass -> mass
was -> was
bee -> bee
compute -> comput
advisable -> advis
unhappy -> unhappi
under -> under


In [12]:
ss.stem("fairly") , ss.stem("sportingly") , ss.stem("sponteneous")

('fair', 'sport', 'sponten')