In [10]:
import numpy 
import matplotlib.pyplot as plt
import nltk 
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer 
from nltk.stem import WordNetLemmatizer 
from nltk.corpus import wordnet
from nltk import pos_tag 

In [11]:
text = """Preprocessing is a crucial step in Natural Language Processing. It includes tasks like stop word removal and stemming to convert raw text into a more useful and structured form. These processes help in reducing the complexity of the text data by eliminating common but insignificant words (stop words) and reducing words to their base or root forms (stemming). This enables machine learning models to focus on the most meaningful aspects of the data, improving performance and accuracy in tasks such as text classification, sentiment analysis, and information retrieval. 
"""

In [12]:
# stop word removal 
text = text.lower()
text_tokens = word_tokenize(text)  
stop_words = set(stopwords.words('english')) 

filtered = ' '.join([word for word in text_tokens if word not in stop_words])

In [13]:
# stemming of the words 

stemmr = PorterStemmer()


stemmed_text = ' '.join([stemmr.stem(word) for word in filtered.split()]) 

# lemmatization of the words

lemmatizr = WordNetLemmatizer() 

lemmatized = ' '.join([lemmatizr.lemmatize(word) for word in filtered.split()]) 


print(f'Original text: {text}')
print(f'Filtered text: {filtered}')
print(f'Stemmed text: {stemmed_text}')
print(f'Lemmatized text: {lemmatized}')

Original text: preprocessing is a crucial step in natural language processing. it includes tasks like stop word removal and stemming to convert raw text into a more useful and structured form. these processes help in reducing the complexity of the text data by eliminating common but insignificant words (stop words) and reducing words to their base or root forms (stemming). this enables machine learning models to focus on the most meaningful aspects of the data, improving performance and accuracy in tasks such as text classification, sentiment analysis, and information retrieval. 

Filtered text: preprocessing crucial step natural language processing . includes tasks like stop word removal stemming convert raw text useful structured form . processes help reducing complexity text data eliminating common insignificant words ( stop words ) reducing words base root forms ( stemming ) . enables machine learning models focus meaningful aspects data , improving performance accuracy tasks text 

In [14]:
pos = pos_tag(word_tokenize(text)) 
print(pos)

[('preprocessing', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('crucial', 'JJ'), ('step', 'NN'), ('in', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.'), ('it', 'PRP'), ('includes', 'VBZ'), ('tasks', 'NNS'), ('like', 'IN'), ('stop', 'NN'), ('word', 'NN'), ('removal', 'NN'), ('and', 'CC'), ('stemming', 'VBG'), ('to', 'TO'), ('convert', 'VB'), ('raw', 'JJ'), ('text', 'NN'), ('into', 'IN'), ('a', 'DT'), ('more', 'RBR'), ('useful', 'JJ'), ('and', 'CC'), ('structured', 'JJ'), ('form', 'NN'), ('.', '.'), ('these', 'DT'), ('processes', 'NNS'), ('help', 'VBP'), ('in', 'IN'), ('reducing', 'VBG'), ('the', 'DT'), ('complexity', 'NN'), ('of', 'IN'), ('the', 'DT'), ('text', 'NN'), ('data', 'NNS'), ('by', 'IN'), ('eliminating', 'VBG'), ('common', 'JJ'), ('but', 'CC'), ('insignificant', 'JJ'), ('words', 'NNS'), ('(', '('), ('stop', 'VB'), ('words', 'NNS'), (')', ')'), ('and', 'CC'), ('reducing', 'VBG'), ('words', 'NNS'), ('to', 'TO'), ('their', 'PRP$'), ('base', 'NN'), ('or', 'C