In [6]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import pos_tag
from collections import Counter
import string

# Download required NLTK resources (run once)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger_eng')

# Input text
text = """
Information retrieval is the process of obtaining relevant information from a collection of resources. 
It is a key part of search engines, databases, and data mining systems.
"""

# Step 1: Tokenization
tokens = word_tokenize(text)
print("Tokenized Words:\n", tokens)

# Step 2: Convert to lowercase and remove punctuation
tokens = [word.lower() for word in tokens if word.isalnum()]

# Step 3: Remove Stop Words
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word not in stop_words]
print("\nFiltered Tokens (Stop words removed):\n", filtered_tokens)

# Step 4: Count Word Frequencies
word_freq = Counter(filtered_tokens)
print("\nWord Frequencies:\n", word_freq)

# Step 5: POS Tagging
pos_tags = nltk.pos_tag(filtered_tokens)
print("\nPOS Tagging:\n", pos_tags)


Tokenized Words:
 ['Information', 'retrieval', 'is', 'the', 'process', 'of', 'obtaining', 'relevant', 'information', 'from', 'a', 'collection', 'of', 'resources', '.', 'It', 'is', 'a', 'key', 'part', 'of', 'search', 'engines', ',', 'databases', ',', 'and', 'data', 'mining', 'systems', '.']

Filtered Tokens (Stop words removed):
 ['information', 'retrieval', 'process', 'obtaining', 'relevant', 'information', 'collection', 'resources', 'key', 'part', 'search', 'engines', 'databases', 'data', 'mining', 'systems']

Word Frequencies:
 Counter({'information': 2, 'retrieval': 1, 'process': 1, 'obtaining': 1, 'relevant': 1, 'collection': 1, 'resources': 1, 'key': 1, 'part': 1, 'search': 1, 'engines': 1, 'databases': 1, 'data': 1, 'mining': 1, 'systems': 1})

POS Tagging:
 [('information', 'NN'), ('retrieval', 'NN'), ('process', 'NN'), ('obtaining', 'VBG'), ('relevant', 'JJ'), ('information', 'NN'), ('collection', 'NN'), ('resources', 'NNS'), ('key', 'JJ'), ('part', 'NN'), ('search', 'NN'), ('e

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hyper\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hyper\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\hyper\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
