In [5]:
# 1. Import NLTK libraries
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag
from nltk.corpus import wordnet

# 2. Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# 3. Take sample text as input
sample_text = "This is an example sentence with stopwords."

# 4. Tokenize the text
tokens = word_tokenize(sample_text)

# 5. Remove stop words
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

# 6. Initialize the stemmer and lemmatizer
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

# 7. Apply stemming
stemmed_words = [stemmer.stem(word) for word in filtered_tokens]

# 8. Function to get the POS tag for lemmatization
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # default to noun


# 9. Apply lemmatization
pos_tags = pos_tag(filtered_tokens)
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(tag)) for word, tag in pos_tags]

# 10. Display results
print("Original Text: ", sample_text)
print("\nTokenized Words: ", tokens)
print("\nFiltered Words (after removing stop words): ", filtered_tokens)
print("\nStemmed Words: ", stemmed_words)
print("\nLemmatized Words: ",lemmatized_words)


Original Text:  This is an example sentence with stopwords.

Tokenized Words:  ['This', 'is', 'an', 'example', 'sentence', 'with', 'stopwords', '.']

Filtered Words (after removing stop words):  ['example', 'sentence', 'stopwords', '.']

Stemmed Words:  ['exampl', 'sentenc', 'stopword', '.']

Lemmatized Words:  ['example', 'sentence', 'stopwords', '.']


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
