In [5]:
import nltk
from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer, RegexpStemmer
from nltk.tokenize import word_tokenize

# Sample text documents
documents = [
    "The cats are running and jumping in the garden.",
    "She is a beautiful runner and loves to run fast.",
    "Running helps to build stamina and strength.",
    "He ran swiftly and caught the ball."
]

# Initialize stemmers
porter_stemmer = PorterStemmer()
lancaster_stemmer = LancasterStemmer()
snowball_stemmer = SnowballStemmer("english")

# Define a simple regex for stemming (this can be customized)
regex_pattern = r'(ing|ed|es|s)$'
regex_stemmer = RegexpStemmer(regex_pattern)


In [6]:

# Function to apply different stemmers
def apply_stemmers(documents):
    results = {}
    
    for doc in documents:
        # Tokenize the document
        tokens = word_tokenize(doc.lower())
        
        # Apply different stemmers
        porter_stems = [porter_stemmer.stem(token) for token in tokens]
        lancaster_stems = [lancaster_stemmer.stem(token) for token in tokens]
        snowball_stems = [snowball_stemmer.stem(token) for token in tokens]
        regex_stems = [regex_stemmer.stem(token) for token in tokens]
        
        # Store results
        results[doc] = {
            'porter': porter_stems,
            'lancaster': lancaster_stems,
            'snowball': snowball_stems,
            'regex': regex_stems
        }
    
    return results

# Apply the stemmers to the sample documents
stemmed_results = apply_stemmers(documents)

# Print the results
for original_doc, stems in stemmed_results.items():
    print(f"\nOriginal Document: {original_doc}")
    for stemmer_name, stemmed_words in stems.items():
        print(f"{stemmer_name.capitalize()} Stems: {stemmed_words}")



Original Document: The cats are running and jumping in the garden.
Porter Stems: ['the', 'cat', 'are', 'run', 'and', 'jump', 'in', 'the', 'garden', '.']
Lancaster Stems: ['the', 'cat', 'ar', 'run', 'and', 'jump', 'in', 'the', 'gard', '.']
Snowball Stems: ['the', 'cat', 'are', 'run', 'and', 'jump', 'in', 'the', 'garden', '.']
Regex Stems: ['the', 'cat', 'are', 'runn', 'and', 'jump', 'in', 'the', 'garden', '.']

Original Document: She is a beautiful runner and loves to run fast.
Porter Stems: ['she', 'is', 'a', 'beauti', 'runner', 'and', 'love', 'to', 'run', 'fast', '.']
Lancaster Stems: ['she', 'is', 'a', 'beauty', 'run', 'and', 'lov', 'to', 'run', 'fast', '.']
Snowball Stems: ['she', 'is', 'a', 'beauti', 'runner', 'and', 'love', 'to', 'run', 'fast', '.']
Regex Stems: ['she', 'i', 'a', 'beautiful', 'runner', 'and', 'lov', 'to', 'run', 'fast', '.']

Original Document: Running helps to build stamina and strength.
Porter Stems: ['run', 'help', 'to', 'build', 'stamina', 'and', 'strength', 

In [4]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Sample text documents
documents = [
    "The cats are running and jumping in the garden.",
    "She is a beautiful runner and loves to run fast.",
    "Running helps to build stamina and strength.",
    "He ran swiftly and caught the ball."
]

# Initialize the WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to apply lemmatization
def apply_lemmatization(doc):
    # Tokenize the document
    tokens = word_tokenize(doc)
    
    # Apply lemmatization to each token
    lemmatized_tokens = [lemmatizer.lemmatize(token,pos='v') for token in tokens]
    
    return {
        "original": tokens,
        "lemmatized": lemmatized_tokens
    }

# Process each document and print results
for doc in documents:
    results = apply_lemmatization(doc)
    print(f"Original Document: {results['original']}")
    print(f"Lemmatized: {results['lemmatized']}")
    print("-" * 40)


Original Document: ['The', 'cats', 'are', 'running', 'and', 'jumping', 'in', 'the', 'garden', '.']
Lemmatized: ['The', 'cat', 'be', 'run', 'and', 'jump', 'in', 'the', 'garden', '.']
----------------------------------------
Original Document: ['She', 'is', 'a', 'beautiful', 'runner', 'and', 'loves', 'to', 'run', 'fast', '.']
Lemmatized: ['She', 'be', 'a', 'beautiful', 'runner', 'and', 'love', 'to', 'run', 'fast', '.']
----------------------------------------
Original Document: ['Running', 'helps', 'to', 'build', 'stamina', 'and', 'strength', '.']
Lemmatized: ['Running', 'help', 'to', 'build', 'stamina', 'and', 'strength', '.']
----------------------------------------
Original Document: ['He', 'ran', 'swiftly', 'and', 'caught', 'the', 'ball', '.']
Lemmatized: ['He', 'run', 'swiftly', 'and', 'catch', 'the', 'ball', '.']
----------------------------------------
