In [4]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk import ne_chunk
from nltk.stem import PorterStemmer, WordNetLemmatizer
import nltk

text = """NLTK is a leading platform for building Python programs to work with human language data."""
words = word_tokenize(text, preserve_line=True)
sentences = sent_tokenize(text)

print("Word Tokenization:")
print(words)

print("\nSentence Tokenization:")
print(sentences)

stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]

print("Filtered Words after Stopword Removal:")
print(filtered_words)

porter_stemmer = PorterStemmer()
stemmed_words = [porter_stemmer.stem(word) for word in filtered_words]

print("Stemmed Words:")
print(stemmed_words)

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Lemmatized Words:", lemmatized_words)

# POS tagging
pos_tags = nltk.pos_tag(words)
print("POS Tags:", pos_tags)

# NER
ner_tags = ne_chunk(pos_tags)
print("NER Tags:", ner_tags)



Word Tokenization:
['NLTK', 'is', 'a', 'leading', 'platform', 'for', 'building', 'Python', 'programs', 'to', 'work', 'with', 'human', 'language', 'data', '.']

Sentence Tokenization:
['NLTK is a leading platform for building Python programs to work with human language data.']
Filtered Words after Stopword Removal:
['NLTK', 'leading', 'platform', 'building', 'Python', 'programs', 'work', 'human', 'language', 'data', '.']
Stemmed Words:
['nltk', 'lead', 'platform', 'build', 'python', 'program', 'work', 'human', 'languag', 'data', '.']
Lemmatized Words: ['NLTK', 'is', 'a', 'leading', 'platform', 'for', 'building', 'Python', 'program', 'to', 'work', 'with', 'human', 'language', 'data', '.']
POS Tags: [('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('leading', 'VBG'), ('platform', 'NN'), ('for', 'IN'), ('building', 'VBG'), ('Python', 'NNP'), ('programs', 'NNS'), ('to', 'TO'), ('work', 'VB'), ('with', 'IN'), ('human', 'JJ'), ('language', 'NN'), ('data', 'NNS'), ('.', '.')]
NER Tags: (S
  (ORGA