In [3]:
# Install NLTK if not already installed
!pip install nltk

# Import necessary libraries
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, SnowballStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
import os

# Download required NLTK data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab') # Download the punkt_tab resource

# Sample text
sample_text = "Cats running faster than mice have often puzzled scientists."

# Task 1: Tokenization
tokens = word_tokenize(sample_text)
print("Tokenized words:", tokens)

# Task 2: Stemming
porter_stemmer = PorterStemmer()
snowball_stemmer = SnowballStemmer(language='english')

porter_stemmed = [porter_stemmer.stem(token) for token in tokens]
snowball_stemmed = [snowball_stemmer.stem(token) for token in tokens]

print("\nPorter Stemmer Results:", porter_stemmed)
print("Snowball Stemmer Results:", snowball_stemmed)

# Task 3: Lemmatization
lemmatizer = WordNetLemmatizer()

lemmatized_words = [lemmatizer.lemmatize(token, wordnet.VERB) for token in tokens]
print("\nLemmatized words:", lemmatized_words)

# Save the results to a file
filename = "<HallticketNo>-<BatchNo>-Lab-2.ipynb"
with open(filename, "w") as file:
    file.write("# Tokenization\n")
    file.write(f"Tokenized words: {tokens}\n\n") # Corrected indentation

    file.write("# Stemming\n")
    file.write(f"Porter Stemmer Results: {porter_stemmed}\n") # Corrected indentation
    file.write(f"Snowball Stemmer Results: {snowball_stemmed}\n\n") # Corrected indentation

    file.write("# Lemmatization\n")
    file.write(f"Lemmatized words: {lemmatized_words}\n") # Corrected indentation

    print(f"\nResults saved in {filename}")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...


Tokenized words: ['Cats', 'running', 'faster', 'than', 'mice', 'have', 'often', 'puzzled', 'scientists', '.']

Porter Stemmer Results: ['cat', 'run', 'faster', 'than', 'mice', 'have', 'often', 'puzzl', 'scientist', '.']
Snowball Stemmer Results: ['cat', 'run', 'faster', 'than', 'mice', 'have', 'often', 'puzzl', 'scientist', '.']

Lemmatized words: ['Cats', 'run', 'faster', 'than', 'mice', 'have', 'often', 'puzzle', 'scientists', '.']

Results saved in <HallticketNo>-<BatchNo>-Lab-2.ipynb


[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
