importing libraries and downloading important resources


In [1]:
# importing libraries
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer,WordNetLemmatizer

# download required nltk resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

Defining the paragraph.

In [2]:
paragraph = (
    "Technology has become an essential part of modern life, transforming "
    "the way people work, communicate, and learn. From smartphones to "
    "artificial intelligence, technological innovations have improved "
    "efficiency and convenience in almost every field. In education, "
    "online learning platforms allow students to access knowledge from "
    "anywhere, while in healthcare, advanced medical equipment and data "
    "analysis save countless lives. However, the rapid growth of technology "
    "also raises concerns about privacy, unemployment, and over-dependence "
    "on machines. Therefore, while technology offers immense benefits, it "
    "is important to use it responsibly to ensure that progress leads to a "
    "better and more balanced society."
)
print(paragraph)

Technology has become an essential part of modern life, transforming the way people work, communicate, and learn. From smartphones to artificial intelligence, technological innovations have improved efficiency and convenience in almost every field. In education, online learning platforms allow students to access knowledge from anywhere, while in healthcare, advanced medical equipment and data analysis save countless lives. However, the rapid growth of technology also raises concerns about privacy, unemployment, and over-dependence on machines. Therefore, while technology offers immense benefits, it is important to use it responsibly to ensure that progress leads to a better and more balanced society.


Tokenization : This splits the paragraph into individual words and punctuation marks


In [3]:
tokens = word_tokenize(paragraph)
print("Token")
print(tokens)

Token
['Technology', 'has', 'become', 'an', 'essential', 'part', 'of', 'modern', 'life', ',', 'transforming', 'the', 'way', 'people', 'work', ',', 'communicate', ',', 'and', 'learn', '.', 'From', 'smartphones', 'to', 'artificial', 'intelligence', ',', 'technological', 'innovations', 'have', 'improved', 'efficiency', 'and', 'convenience', 'in', 'almost', 'every', 'field', '.', 'In', 'education', ',', 'online', 'learning', 'platforms', 'allow', 'students', 'to', 'access', 'knowledge', 'from', 'anywhere', ',', 'while', 'in', 'healthcare', ',', 'advanced', 'medical', 'equipment', 'and', 'data', 'analysis', 'save', 'countless', 'lives', '.', 'However', ',', 'the', 'rapid', 'growth', 'of', 'technology', 'also', 'raises', 'concerns', 'about', 'privacy', ',', 'unemployment', ',', 'and', 'over-dependence', 'on', 'machines', '.', 'Therefore', ',', 'while', 'technology', 'offers', 'immense', 'benefits', ',', 'it', 'is', 'important', 'to', 'use', 'it', 'responsibly', 'to', 'ensure', 'that', 'progr

Stopword Removal : This removes common english words like "is","the","and",etc. Keeping only meaningful words.

In [4]:
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words and word.isalpha()]
print("after stopword removal")
print(filtered_tokens)

after stopword removal
['Technology', 'become', 'essential', 'part', 'modern', 'life', 'transforming', 'way', 'people', 'work', 'communicate', 'learn', 'smartphones', 'artificial', 'intelligence', 'technological', 'innovations', 'improved', 'efficiency', 'convenience', 'almost', 'every', 'field', 'education', 'online', 'learning', 'platforms', 'allow', 'students', 'access', 'knowledge', 'anywhere', 'healthcare', 'advanced', 'medical', 'equipment', 'data', 'analysis', 'save', 'countless', 'lives', 'However', 'rapid', 'growth', 'technology', 'also', 'raises', 'concerns', 'privacy', 'unemployment', 'machines', 'Therefore', 'technology', 'offers', 'immense', 'benefits', 'important', 'use', 'responsibly', 'ensure', 'progress', 'leads', 'better', 'balanced', 'society']


Stemming : Reduce a word to its root forms often not actual words

In [5]:
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_tokens]
print("after stemming")
print(stemmed_words)

after stemming
['technolog', 'becom', 'essenti', 'part', 'modern', 'life', 'transform', 'way', 'peopl', 'work', 'commun', 'learn', 'smartphon', 'artifici', 'intellig', 'technolog', 'innov', 'improv', 'effici', 'conveni', 'almost', 'everi', 'field', 'educ', 'onlin', 'learn', 'platform', 'allow', 'student', 'access', 'knowledg', 'anywher', 'healthcar', 'advanc', 'medic', 'equip', 'data', 'analysi', 'save', 'countless', 'live', 'howev', 'rapid', 'growth', 'technolog', 'also', 'rais', 'concern', 'privaci', 'unemploy', 'machin', 'therefor', 'technolog', 'offer', 'immens', 'benefit', 'import', 'use', 'respons', 'ensur', 'progress', 'lead', 'better', 'balanc', 'societi']


Lemmatization : lemmatization returns dictionary base forms.

In [6]:
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print("After lemmatization")
print(lemmatized_words)

After lemmatization
['Technology', 'become', 'essential', 'part', 'modern', 'life', 'transforming', 'way', 'people', 'work', 'communicate', 'learn', 'smartphones', 'artificial', 'intelligence', 'technological', 'innovation', 'improved', 'efficiency', 'convenience', 'almost', 'every', 'field', 'education', 'online', 'learning', 'platform', 'allow', 'student', 'access', 'knowledge', 'anywhere', 'healthcare', 'advanced', 'medical', 'equipment', 'data', 'analysis', 'save', 'countless', 'life', 'However', 'rapid', 'growth', 'technology', 'also', 'raise', 'concern', 'privacy', 'unemployment', 'machine', 'Therefore', 'technology', 'offer', 'immense', 'benefit', 'important', 'use', 'responsibly', 'ensure', 'progress', 'lead', 'better', 'balanced', 'society']
