In [2]:
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

class LovinsStemmer:
    def stem(self, word):
        suffixes = ['ing', 'ly', 'ed', 'ious', 'ies', 'ive', 'es', 's', 'ment']
        for suffix in suffixes:
            if word.endswith(suffix):
                return word[:-len(suffix)]
        return word

# New text for processing
text = ("As the day drew to a close, the horizon was painted with brilliant shades of red and orange. "
        "The sounds of chirping crickets filled the air, accompanied by the gentle rustle of leaves in the wind.")

# Preprocess the text
text = text.lower()
text = re.sub(r'[^a-z0-9\s]', '', text)

# Tokenization
tokens = word_tokenize(text)

# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word not in stop_words]

# Initialize stemmers
porter_stemmer = PorterStemmer()
lovins_stemmer = LovinsStemmer()

# Apply stemming
porter_stemmed_words = [porter_stemmer.stem(token) for token in filtered_tokens]
lovins_stemmed_words = [lovins_stemmer.stem(token) for token in filtered_tokens]

# Display results
print("{:<15} {:<15} {:<15}".format("Original", "Porter Stemmer", "Lovins Stemmer"))
print("-" * 45)
for i in range(len(filtered_tokens)):
    print("{:<15} {:<15} {:<15}".format(filtered_tokens[i], porter_stemmed_words[i], lovins_stemmed_words[i]))



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Original        Porter Stemmer  Lovins Stemmer 
---------------------------------------------
day             day             day            
drew            drew            drew           
close           close           close          
horizon         horizon         horizon        
painted         paint           paint          
brilliant       brilliant       brilliant      
shades          shade           shad           
red             red             r              
orange          orang           orange         
sounds          sound           sound          
chirping        chirp           chirp          
crickets        cricket         cricket        
filled          fill            fill           
air             air             air            
accompanied     accompani       accompani      
gentle          gentl           gentle         
rustle          rustl           rustle         
leaves          leav            leav           
wind            wind            wind      

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
