# Import Necessary Libraries

In [1]:
import re
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Sample Text

In [2]:
sample_text = """
Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction
between computers and humans through natural language. The ultimate goal of NLP is to read, decipher,
understand, and make sense of human language in a way that is both valuable and meaningful.
"""

# Tokenization

In [11]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [12]:
sentences = sent_tokenize(sample_text)
words = [word_tokenize(sentence) for sentence in sentences]

# Lowercasing and Removing Special Characters

In [13]:
cleaned_words = [[re.sub(r'[^a-zA-Z0-9]', '', word.lower()) for word in sentence] for sentence in words]

# Removing Stopwords

In [14]:
stop_words = set(stopwords.words('english'))
filtered_words = [[word for word in sentence if word not in stop_words] for sentence in cleaned_words]

# Stemming and Lemmatization

In [15]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

stemmed_words = [[stemmer.stem(word) for word in sentence] for sentence in filtered_words]
lemmatized_words = [[lemmatizer.lemmatize(word) for word in sentence] for sentence in filtered_words]

# Printing Processed Sentences

In [16]:
print("Original Sentences:")
for sentence in sentences:
    print(sentence)

print("\nProcessed Sentences (Lemmatized):")
for sentence in lemmatized_words:
    print(' '.join(sentence))

Original Sentences:

Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction
between computers and humans through natural language.
The ultimate goal of NLP is to read, decipher,
understand, and make sense of human language in a way that is both valuable and meaningful.

Processed Sentences (Lemmatized):
natural language processing  nlp  field artificial intelligence focus interaction computer human natural language 
ultimate goal nlp read  decipher  understand  make sense human language way valuable meaningful 
