In [1]:
# Importing NLTK
import nltk
# Downloading necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\words.zip.


True

# Sample text 

In [3]:
text = "NLTK is a powerful tool for natural language processing. It can tokenize sentences and words. NLTK includes various NLP libraries for text analysis."
text

'NLTK is a powerful tool for natural language processing. It can tokenize sentences and words. NLTK includes various NLP libraries for text analysis.'

# Tokenization

In [4]:
from nltk.tokenize import word_tokenize, sent_tokenize

sentences = sent_tokenize(text)
words = word_tokenize(text)
print("Words:", words)

Words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'natural', 'language', 'processing', '.', 'It', 'can', 'tokenize', 'sentences', 'and', 'words', '.', 'NLTK', 'includes', 'various', 'NLP', 'libraries', 'for', 'text', 'analysis', '.']


# Part-of-speech tagging

In [5]:
# speech tagging
from nltk import pos_tag

pos_tags = pos_tag(words)
print("POS Tags:", pos_tags)

POS Tags: [('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('powerful', 'JJ'), ('tool', 'NN'), ('for', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.'), ('It', 'PRP'), ('can', 'MD'), ('tokenize', 'VB'), ('sentences', 'NNS'), ('and', 'CC'), ('words', 'NNS'), ('.', '.'), ('NLTK', 'NNP'), ('includes', 'VBZ'), ('various', 'JJ'), ('NLP', 'NNP'), ('libraries', 'NNS'), ('for', 'IN'), ('text', 'JJ'), ('analysis', 'NN'), ('.', '.')]


# Stemming and Lemmatization

In [6]:
from nltk.stem import PorterStemmer, WordNetLemmatizer

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
stemmed_words = [stemmer.stem(word) for word in words]
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Stemmed Words:", stemmed_words)
print("Lemmatized Words:", lemmatized_words)

Stemmed Words: ['nltk', 'is', 'a', 'power', 'tool', 'for', 'natur', 'languag', 'process', '.', 'it', 'can', 'token', 'sentenc', 'and', 'word', '.', 'nltk', 'includ', 'variou', 'nlp', 'librari', 'for', 'text', 'analysi', '.']
Lemmatized Words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'natural', 'language', 'processing', '.', 'It', 'can', 'tokenize', 'sentence', 'and', 'word', '.', 'NLTK', 'includes', 'various', 'NLP', 'library', 'for', 'text', 'analysis', '.']


# Stop words removal 

In [7]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words("english"))
filtered_words = [word for word in words if word.lower() not in stop_words]
print("Filtered Words:", filtered_words)

Filtered Words: ['NLTK', 'powerful', 'tool', 'natural', 'language', 'processing', '.', 'tokenize', 'sentences', 'words', '.', 'NLTK', 'includes', 'various', 'NLP', 'libraries', 'text', 'analysis', '.']


# Frequency Distribution

In [8]:
# distribution of frequency
from nltk import FreqDist

freq_dist = FreqDist(words)
print("Frequency Distribution:", freq_dist)

Frequency Distribution: <FreqDist with 22 samples and 26 outcomes>


# Concordance and Similarity

In [9]:
from nltk.text import Text

text_object = Text(words)
concordance_result = text_object.concordance("NLTK")
similar_words = text_object.similar("tool")
print("Concordance Result:", concordance_result)
print("Similar Words:", similar_words)

Displaying 2 of 2 matches:
 NLTK is a powerful tool for natural langu
t can tokenize sentences and words . NLTK includes various NLP libraries for t

Concordance Result: None
Similar Words: None


# Sentiment Analysis

In [11]:
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()
sentiment_score = sia.polarity_scores(text)
print("Sentiment Analysis Score:", sentiment_score)

LookupError: 
**********************************************************************
  Resource [93mvader_lexicon[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('vader_lexicon')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93msentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt[0m

  Searched in:
    - 'C:\\Users\\PC/nltk_data'
    - 'c:\\Users\\PC\\AppData\\Local\\Programs\\Python\\Python311\\nltk_data'
    - 'c:\\Users\\PC\\AppData\\Local\\Programs\\Python\\Python311\\share\\nltk_data'
    - 'c:\\Users\\PC\\AppData\\Local\\Programs\\Python\\Python311\\lib\\nltk_data'
    - 'C:\\Users\\PC\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
    - ''
**********************************************************************


# Named Entity Recognition (NER)

In [11]:
from nltk import ne_chunk

tokens = word_tokenize(text)
pos_tags_for_ner = pos_tag(tokens)
ner_result = ne_chunk(pos_tags_for_ner)
print("NER Result:", ner_result)

NER Result: (S
  (ORGANIZATION NLTK/NNP)
  is/VBZ
  a/DT
  powerful/JJ
  tool/NN
  for/IN
  natural/JJ
  language/NN
  processing/NN
  ./.
  It/PRP
  can/MD
  tokenize/VB
  sentences/NNS
  and/CC
  words/NNS
  ./.
  (ORGANIZATION NLTK/NNP)
  includes/VBZ
  various/JJ
  (ORGANIZATION NLP/NNP)
  libraries/NNS
  for/IN
  text/JJ
  analysis/NN
  ./.)
