# Stemming

#### Stemming in NLP is the process of reducing words to their base or root form by chopping off suffixes.

In [2]:
# There are different types of stemmers to use, but the mostly used ones are PorterStemmer and SnowballStemmer
from nltk.stem import PorterStemmer, SnowballStemmer

ps = PorterStemmer()
ss = SnowballStemmer('english')

#### 1. Basic words

In [12]:
words1 = ["learning", "learned", "learns", "projects", "practitioner", "practicing"]
print("Porter:", [ps.stem(w) for w in words1])
print("Snowball:", [ss.stem(w) for w in words1])

Porter: ['learn', 'learn', 'learn', 'project', 'practition', 'practic']
Snowball: ['learn', 'learn', 'learn', 'project', 'practition', 'practic']


#### 2. Words with Suffixes and Prefixes

In [13]:
words2 = ["automation", "automate", "automated", "automating", "automatically"]

print("Porter:", [ps.stem(w) for w in words2])
print("Snowball:", [ss.stem(w) for w in words2])

Porter: ['autom', 'autom', 'autom', 'autom', 'automat']
Snowball: ['autom', 'autom', 'autom', 'autom', 'automat']


#### 3. Compound Words & Hyphenated Terms

In [14]:
words3 = ["deep-learning", "state-of-the-art", "pre-trained", "fine-tuning"]

print("Porter:", [ps.stem(w) for w in words3])
print("Snowball:", [ss.stem(w) for w in words3])

Porter: ['deep-learn', 'state-of-the-art', 'pre-train', 'fine-tun']
Snowball: ['deep-learn', 'state-of-the-art', 'pre-train', 'fine-tun']


#### 4. Words with Special Characters & Numbers

In [15]:
words4 = ["YOLOv8", "AI/ML", "neuroimaging2024", "data-driven"]

print("Porter:", [ps.stem(w) for w in words4])
print("Snowball:", [ss.stem(w) for w in words4])

Porter: ['yolov8', 'ai/ml', 'neuroimaging2024', 'data-driven']
Snowball: ['yolov8', 'ai/ml', 'neuroimaging2024', 'data-driven']


#### 5. Adjectives and Comparative Forms

In [16]:
words5 = ["better", "best", "faster", "fastest", "larger", "largest"]

print("Porter:", [ps.stem(w) for w in words5])
print("Snowball:", [ss.stem(w) for w in words5])


Porter: ['better', 'best', 'faster', 'fastest', 'larger', 'largest']
Snowball: ['better', 'best', 'faster', 'fastest', 'larger', 'largest']


#### 6. Verbs in Different Forms

In [17]:
words6 = ["working", "worked", "works", "runs", "running"]

print("Porter:", [ps.stem(w) for w in words6])
print("Snowball:", [ss.stem(w) for w in words6])


Porter: ['work', 'work', 'work', 'run', 'run']
Snowball: ['work', 'work', 'work', 'run', 'run']


## Conclusion:

##### Porter Stemmer : Simple and widely used, but may over-stem sometimes.
##### Snowball Stemmer : Improved version of Porter, more accurate and flexible.