In [None]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag, FreqDist
import string

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
text = """Natural Language Processing (NLP) enables computers to understand human language.
It is widely used in AI applications like chatbots, search engines, and sentiment analysis."""

print("Original Text:")
print(text)

Original Text:
Natural Language Processing (NLP) enables computers to understand human language.
It is widely used in AI applications like chatbots, search engines, and sentiment analysis.


In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
sentences = sent_tokenize(text)

print("Sentences:")
print(sentences)

Sentences:
['Natural Language Processing (NLP) enables computers to understand human language.', 'It is widely used in AI applications like chatbots, search engines, and sentiment analysis.']


In [None]:
words = word_tokenize(text)

print("Words:")
print(words)

Words:
['Natural', 'Language', 'Processing', '(', 'NLP', ')', 'enables', 'computers', 'to', 'understand', 'human', 'language', '.', 'It', 'is', 'widely', 'used', 'in', 'AI', 'applications', 'like', 'chatbots', ',', 'search', 'engines', ',', 'and', 'sentiment', 'analysis', '.']


In [None]:
words_no_punct = [word for word in words if word not in string.punctuation]

print("Without Punctuation:")
print(words_no_punct)

Without Punctuation:
['Natural', 'Language', 'Processing', 'NLP', 'enables', 'computers', 'to', 'understand', 'human', 'language', 'It', 'is', 'widely', 'used', 'in', 'AI', 'applications', 'like', 'chatbots', 'search', 'engines', 'and', 'sentiment', 'analysis']


In [None]:
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words_no_punct if word.lower() not in stop_words]

print("After Stopword Removal:")
print(filtered_words)

After Stopword Removal:
['Natural', 'Language', 'Processing', 'NLP', 'enables', 'computers', 'understand', 'human', 'language', 'widely', 'used', 'AI', 'applications', 'like', 'chatbots', 'search', 'engines', 'sentiment', 'analysis']


In [None]:
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_words]

print("Stemmed Words:")
print(stemmed_words)

Stemmed Words:
['natur', 'languag', 'process', 'nlp', 'enabl', 'comput', 'understand', 'human', 'languag', 'wide', 'use', 'ai', 'applic', 'like', 'chatbot', 'search', 'engin', 'sentiment', 'analysi']


In [None]:
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]

print("Lemmatized Words:")
print(lemmatized_words)

Lemmatized Words:
['Natural', 'Language', 'Processing', 'NLP', 'enables', 'computer', 'understand', 'human', 'language', 'widely', 'used', 'AI', 'application', 'like', 'chatbots', 'search', 'engine', 'sentiment', 'analysis']


In [None]:
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [None]:
pos_tags = pos_tag(filtered_words)

print("POS Tags:")
print(pos_tags)

POS Tags:
[('Natural', 'JJ'), ('Language', 'NNP'), ('Processing', 'NNP'), ('NLP', 'NNP'), ('enables', 'VBZ'), ('computers', 'NNS'), ('understand', 'VBP'), ('human', 'JJ'), ('language', 'NN'), ('widely', 'RB'), ('used', 'VBN'), ('AI', 'NNP'), ('applications', 'NNS'), ('like', 'IN'), ('chatbots', 'NNS'), ('search', 'VBP'), ('engines', 'JJ'), ('sentiment', 'NN'), ('analysis', 'NN')]


In [None]:
freq_dist = FreqDist(filtered_words)

print("Word Frequency:")
for word, freq in freq_dist.items():
    print(word, ":", freq)

Word Frequency:
Natural : 1
Language : 1
Processing : 1
NLP : 1
enables : 1
computers : 1
understand : 1
human : 1
language : 1
widely : 1
used : 1
AI : 1
applications : 1
like : 1
chatbots : 1
search : 1
engines : 1
sentiment : 1
analysis : 1
