In [1]:
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import pos_tag

In [2]:
nltk.download('punkt')  
nltk.download('stopwords')
nltk.download('wordnet')  
nltk.download('averaged_perceptron_tagger') 

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [3]:
text = "The children are playing in the playground. They are running fast and enjoying themselves."

In [4]:
words = word_tokenize(text)
print("Tokenized Words:", words)

Tokenized Words: ['The', 'children', 'are', 'playing', 'in', 'the', 'playground', '.', 'They', 'are', 'running', 'fast', 'and', 'enjoying', 'themselves', '.']


In [5]:
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]
print("Filtered Words:", filtered_words)

Filtered Words: ['children', 'playing', 'playground', '.', 'running', 'fast', 'enjoying', '.']


In [6]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [7]:
stemmed_words = [stemmer.stem(word) for word in filtered_words]
print("Stemmed Words:", stemmed_words)

Stemmed Words: ['children', 'play', 'playground', '.', 'run', 'fast', 'enjoy', '.']


In [8]:
def get_wordnet_pos(tag):
    if tag.startswith('V'):
        return 'v'
    elif tag.startswith('N'):
        return 'n'
    elif tag.startswith('R'):
        return 'r'
    else:
        return 'a'

pos_tags = pos_tag(filtered_words)
print("POS Tags:", pos_tags)

POS Tags: [('children', 'NNS'), ('playing', 'VBG'), ('playground', 'NN'), ('.', '.'), ('running', 'VBG'), ('fast', 'RB'), ('enjoying', 'VBG'), ('.', '.')]


In [9]:
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(tag)) for word, tag in pos_tags]
print("Lemmatized Words:", lemmatized_words)

Lemmatized Words: ['child', 'play', 'playground', '.', 'run', 'fast', 'enjoy', '.']


In [10]:
print("\n--- Final Results ---")
print("Original Words:", filtered_words)
print("Stemmed Words:", stemmed_words)
print("Lemmatized Words:", lemmatized_words)


--- Final Results ---
Original Words: ['children', 'playing', 'playground', '.', 'running', 'fast', 'enjoying', '.']
Stemmed Words: ['children', 'play', 'playground', '.', 'run', 'fast', 'enjoy', '.']
Lemmatized Words: ['child', 'play', 'playground', '.', 'run', 'fast', 'enjoy', '.']
