In [7]:
# 🔤 Sample Words:
words = ["eating", "eats", "eaten", "writing", "writes", "programming", "programs", "history", "finally", "finalized"]
# ✅ Porter stemmer is a common stemming technique in NLTK.
from nltk.stem import PorterStemmer
# Initialize the stemmer
stemmer = PorterStemmer()

# Apply stemming to each word
for word in words:
    print(f"{word} ➡️ {stemmer.stem(word)}")
# eating ➡️ eat
# eats ➡️ eat
# eaten ➡️ eaten
# writing ➡️ write
# writes ➡️ write
# programming ➡️ program
# programs ➡️ program
# history ➡️ histori
# finally ➡️ final
# finalized ➡️ final

# ❗ Disadvantage:
# Some outputs don’t have a proper meaning (like history ➡️ histori), which may confuse the model.

# 🧪 More Porter Stemmer Examples
print(stemmer.stem("congratulations"))  # Output: congratul
print(stemmer.stem("sitting"))          # Output: sit
# 🔴 Issue:
# •	"congratulations" ➡️ "congratul" (word loses actual meaning)

# 🔹 2. Using Regexp (Regex) Stemmer
# ✅ Allows custom rules using regular expressions to strip suffixes.
from nltk.stem import RegexpStemmer

# Remove 'ing', 's', 'e', 'able' at the end of a word
reg_stemmer = RegexpStemmer('ing$|s$|e$|able$')

print("reg_stemmer", reg_stemmer.stem("eating"))  # eat
# 🧠 Explanation:
# •	ing$ ➡️ Matches words ending with ing
# •	s$ ➡️ Ends with s
# •	e$ ➡️ Ends with e
# •	able$ ➡️ Ends with able

# 🔤 More Examples
print(reg_stemmer.stem("programmable"))   # ➡️ programm
print(reg_stemmer.stem("tables"))         # ➡️ table
print(reg_stemmer.stem("eating"))         # ➡️ eat

from nltk.stem import SnowballStemmer
# Initialize for English
snowball = SnowballStemmer(language='english')

# Apply to words
for word in words:
    print(f"{word} ➡️ {snowball.stem(word)}")
# eating ➡️ eat
# eats ➡️ eat
# eaten ➡️ eaten
# writing ➡️ write
# writes ➡️ write
# programming ➡️ program
# programs ➡️ program
# history ➡️ histori
# finally ➡️ final
# finalized ➡️ final

# 📌 Snowball vs Porter Comparison
print("Porter ➡️", stemmer.stem("fairly"), stemmer.stem("sportingly"))  # Output: fairli, sportingli
print("Snowball ➡️", snowball.stem("fairly"), snowball.stem("sportingly"))  # Output: fair, sport
# ✅ Snowball gives more accurate root forms.

# ❗ Still a Few Disadvantages
# Even Snowball can fail sometimes:
print(snowball.stem("goes"))   # Output: goe (❌ incorrect)
print(snowball.stem("going"))  # Output: go  (✅ correct)







eating ➡️ eat
eats ➡️ eat
eaten ➡️ eaten
writing ➡️ write
writes ➡️ write
programming ➡️ program
programs ➡️ program
history ➡️ histori
finally ➡️ final
finalized ➡️ final
congratul
sit
reg_stemmer eat
programm
table
eat
eating ➡️ eat
eats ➡️ eat
eaten ➡️ eaten
writing ➡️ write
writes ➡️ write
programming ➡️ program
programs ➡️ program
history ➡️ histori
finally ➡️ final
finalized ➡️ final
Porter ➡️ fairli sportingli
Snowball ➡️ fair sport
goe
go
