# Stemming

## Porter Stemmer

In [1]:
# Importing the Natural Language processing Tool Kit (nltk library)
import nltk

# Importing Porter Stemmer from nltk
from nltk.stem.porter import PorterStemmer

# Initializin the PorterStemmer
p_stemmer = PorterStemmer()

In [2]:
# Using Pretty Table to view the data in Tabular form
from prettytable import PrettyTable

# Initializing the PrettyTable object
t = PrettyTable(["Original Word", "Stemmed Word"])

# Words to stem
words = ['run','runner','running','ran','runs','easily','fairly','fairness']

for word in words:
  # Applying the stemming
  t.add_row([word, p_stemmer.stem(word)])

# Printing the table
print(t)

+---------------+--------------+
| Original Word | Stemmed Word |
+---------------+--------------+
|      run      |     run      |
|     runner    |    runner    |
|    running    |     run      |
|      ran      |     ran      |
|      runs     |     run      |
|     easily    |    easili    |
|     fairly    |    fairli    |
|    fairness   |     fair     |
+---------------+--------------+


<font color=lightgreen>Note how the stemmer recognizes "runner" as a noun, not a verb form or participle. Also, the adverbs "easily" and "fairly" are stemmed to the unusual root "easili" and "fairli"</font>

## SnowballStemmer

In [3]:
# Importing the Snowball Stemmer from the nltk library
from nltk.stem.snowball import SnowballStemmer

# Initializing the Snowball Stemmer
s_stemmer = SnowballStemmer(language="english")

# Reinitialize the PrettyTable
t = PrettyTable(["Original Word", "Stemmed Word"])

for word in words:
  # Applying the stemming
  t.add_row([word, s_stemmer.stem(word)])

# Printing the table
print(t)

+---------------+--------------+
| Original Word | Stemmed Word |
+---------------+--------------+
|      run      |     run      |
|     runner    |    runner    |
|    running    |     run      |
|      ran      |     ran      |
|      runs     |     run      |
|     easily    |    easili    |
|     fairly    |     fair     |
|    fairness   |     fair     |
+---------------+--------------+


<font color=lightgreen>In this case the stemmer performed the same as the Porter Stemmer, with the exception that it handled the stem of "fairly" more appropriately with "fair"</font>

# Testing the Stemmers with similar words

In [4]:
words = ["generous", "generation", "generously", "generate"]

# Reinitialize the PrettyTable
t = PrettyTable(["Original Word", "Stemmed Word"])
t.title = "Porter Stemmer"

for word in words:
  # Applying the stemming
  t.add_row([word, p_stemmer.stem(word)])

# Printing the table
print(t)

+------------------------------+
|        Porter Stemmer        |
+---------------+--------------+
| Original Word | Stemmed Word |
+---------------+--------------+
|    generous   |    gener     |
|   generation  |    gener     |
|   generously  |    gener     |
|    generate   |    gener     |
+---------------+--------------+


In [5]:
# Reinitialize the PrettyTable
t = PrettyTable(["Original Word", "Stemmed Word"])
t.title = "Snowball Stemmer"

for word in words:
  # Applying the stemming
  t.add_row([word, s_stemmer.stem(word)])

# Printing the table
print(t)

+------------------------------+
|       Snowball Stemmer       |
+---------------+--------------+
| Original Word | Stemmed Word |
+---------------+--------------+
|    generous   |   generous   |
|   generation  |   generat    |
|   generously  |   generous   |
|    generate   |   generat    |
+---------------+--------------+
