In [None]:
#Installation
!pip install nltk



In [1]:
import nltk

In [2]:
## Downloads the Punkt Tokenizer Models
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

The command nltk.download('punkt') downloads the Punkt Tokenizer Models, a pre-trained model in NLTK that is essential for text tokenization. The Punkt tokenizer is a sentence and word tokenization model specifically trained to split text into individual sentences and words based on punctuation and other linguistic clues.

In [6]:
# Download the punkt_tab tokenizer model
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [7]:
# 1.tokenization
import nltk
from nltk import word_tokenize, sent_tokenize

text = "Success is not final, failure is not fatal. It is the courage to continue that counts."

# word tokenization
words = word_tokenize(text)
print("Word Tokenization:", words)

# sentence tokenization
sentences = sent_tokenize(text)
print("Sentence Tokenization:", sentences)


Word Tokenization: ['Success', 'is', 'not', 'final', ',', 'failure', 'is', 'not', 'fatal', '.', 'It', 'is', 'the', 'courage', 'to', 'continue', 'that', 'counts', '.']
Sentence Tokenization: ['Success is not final, failure is not fatal.', 'It is the courage to continue that counts.']


In [9]:
# Download the 'stopwords' dataset
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [10]:
# 2.stop words removal
from nltk.corpus import stopwords

stop_words = set(stopwords.words("english"))
words = word_tokenize("This is a simple sentence for testing")
filtered_words = [word for word in words if word.lower() not in stop_words]
print("Filtered Words:", filtered_words)

Filtered Words: ['simple', 'sentence', 'testing']


In [None]:
#Stemming and Lemmatization
from nltk.stem import PorterStemmer, WordNetLemmatizer

#stemming
ps = PorterStemmer()
print("stemmed words:", [ps.stem(word) for word in filtered_words])

stemmed words: ['simpl', 'sentenc', 'test']


In [None]:
#Lemmatization
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
print("Lemmatized word:", [lemmatizer.lemmatize(word) for word in filtered_words]) # Use lemmatizer.lemmatize(word) instead of lemmatizer(word)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Lemmatized word: ['simple', 'sentence', 'testing']


In [None]:
#part of speech (pos) tagging
import nltk
nltk.download("averaged_perceptron_tagger")
# Corrected the function name from word_tokanize to word_tokenize
pos_tags = nltk.pos_tag(nltk.word_tokenize("NLTK is amazing for NLP tasks!"))
print("POS Tags:", pos_tags)

POS Tags: [('NLTK', 'NNP'), ('is', 'VBZ'), ('amazing', 'VBG'), ('for', 'IN'), ('NLP', 'NNP'), ('tasks', 'NNS'), ('!', '.')]


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [None]:
# Named Entity Recognition (NER)
import nltk
nltk.download("maxent_ne_chunker")
nltk.download("words")

# Import the ne_chunk function from nltk.chunk
from nltk.chunk import ne_chunk

# Now use ne_chunk to extract entities
entities = ne_chunk(pos_tags)
print("Named Entities:", entities)

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


Named Entities: (S
  (ORGANIZATION NLTK/NNP)
  is/VBZ
  amazing/VBG
  for/IN
  (ORGANIZATION NLP/NNP)
  tasks/NNS
  !/.)


In [11]:
# Sentiment Analysis (Using Pre-trained Models)
from nltk.sentiment import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores("NLTK is incredibly helpful for NLP tasks!")
print("Sentiment:", sentiment)


Sentiment: {'neg': 0.0, 'neu': 0.639, 'pos': 0.361, 'compound': 0.5244}


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
