###### Porter stemmer Algorithm

In [1]:
from  nltk.stem import PorterStemmer

In [2]:
porter = PorterStemmer()

In [3]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

In [6]:
for word in words:
    print(porter.stem(word))

eat
eat
eat
ate
adjust
raft
abil
meet


In [25]:
# in case of sentences we have to do tokenization
from nltk.tokenize import word_tokenize

In [9]:
sentence = "Tokenization is an important step in natural language processing."
words=word_tokenize(sentence)

In [10]:
words

['Tokenization',
 'is',
 'an',
 'important',
 'step',
 'in',
 'natural',
 'language',
 'processing',
 '.']

In [11]:
stemmed_words = [porter.stem(word) for word in words]

In [12]:
stemmed_words

['token',
 'is',
 'an',
 'import',
 'step',
 'in',
 'natur',
 'languag',
 'process',
 '.']

###### Snowball Stemmer

In [13]:
from nltk.stem import SnowballStemmer

In [14]:
snowball=SnowballStemmer("english")

In [15]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

In [16]:
stemmed_words=[snowball.stem(word) for word in words]

In [17]:
stemmed_words

['eat', 'eat', 'eat', 'ate', 'adjust', 'raft', 'abil', 'meet']

###### Lancaster Stemmer

In [18]:
from nltk.stem import LancasterStemmer

In [19]:
lancaster=LancasterStemmer()

In [22]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

In [24]:
stemmed_words=[lancaster.stem(word) for word in words]
stemmed_words

['eat', 'eat', 'eat', 'at', 'adjust', 'raft', 'abl', 'meet']

###### Customizing the Rule of stemming

In [26]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

# Example sentence
sentence = "Customization is possible with custom stemming rules."

# Create a PorterStemmer object
porter = PorterStemmer()

# Define custom stemming rules
custom_rules = {"customization": "custom", "rules": "rule"}

# Tokenize the sentence into words
words = word_tokenize(sentence)

# Apply custom stemming rules before Porter Stemmer using a loop
stemmed_words = []
for word in words:
    if word in custom_rules:
        stemmed_words.append(custom_rules[word])
    else:
        stemmed_words.append(porter.stem(word))

# Print the result
print("Original Sentence:", sentence)
print("Stemmed Words:", stemmed_words)


Original Sentence: Customization is possible with custom stemming rules.
Stemmed Words: ['custom', 'is', 'possibl', 'with', 'custom', 'stem', 'rule', '.']
