In [1]:
# Import necessary libraries
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag, ne_chunk
from nltk.probability import FreqDist

In [2]:
# Example text for all tasks
text = """Natural Language Processing is an exciting field of Artificial Intelligence.
It enables machines to understand human language. John works at Google in New York."""

In [13]:
# Tokenization
def tokenize_text(text):
    sentences = sent_tokenize(text)  # Sentence tokenization
    words = word_tokenize(text)  # Word tokenization
    return sentences, words

sentences, words = tokenize_text(text)
print(f"Sentence Tokenization: {sentences}")
print(f"\nWord Tokenization:{words}")

Sentence Tokenization: ['Natural Language Processing is an exciting field of Artificial Intelligence.', 'It enables machines to understand human language.', 'John works at Google in New York.']

Word Tokenization:['Natural', 'Language', 'Processing', 'is', 'an', 'exciting', 'field', 'of', 'Artificial', 'Intelligence', '.', 'It', 'enables', 'machines', 'to', 'understand', 'human', 'language', '.', 'John', 'works', 'at', 'Google', 'in', 'New', 'York', '.']


In [12]:
# Stop Words Removal
def remove_stopwords(words):
    stop_words = set(stopwords.words("english"))
    filtered_words = [word for word in words if word.lower() not in stop_words]
    return filtered_words

filtered_words = remove_stopwords(words)
print(f"Without Stop Words:{filtered_words}")

Without Stop Words:['Natural', 'Language', 'Processing', 'exciting', 'field', 'Artificial', 'Intelligence', '.', 'enables', 'machines', 'understand', 'human', 'language', '.', 'John', 'works', 'Google', 'New', 'York', '.']


In [16]:
# Lemmatization
def apply_lemmatization(words):
    lemmatizer = WordNetLemmatizer()
    return [lemmatizer.lemmatize(word) for word in words]

lemmatized_words = apply_lemmatization(filtered_words)
print(f"Lemmatized Words:{lemmatized_words}")

Lemmatized Words:['Natural', 'Language', 'Processing', 'exciting', 'field', 'Artificial', 'Intelligence', '.', 'enables', 'machine', 'understand', 'human', 'language', '.', 'John', 'work', 'Google', 'New', 'York', '.']


In [21]:
# Parts of Speech (POS) Tagging
def pos_tagging(words):
    return pos_tag(words)

pos_tags = pos_tagging(words)
print(f"POS Tags: {pos_tags}")

POS Tags: [('Natural', 'JJ'), ('Language', 'NNP'), ('Processing', 'NNP'), ('is', 'VBZ'), ('an', 'DT'), ('exciting', 'JJ'), ('field', 'NN'), ('of', 'IN'), ('Artificial', 'JJ'), ('Intelligence', 'NNP'), ('.', '.'), ('It', 'PRP'), ('enables', 'VBZ'), ('machines', 'NNS'), ('to', 'TO'), ('understand', 'VB'), ('human', 'JJ'), ('language', 'NN'), ('.', '.'), ('John', 'NNP'), ('works', 'VBZ'), ('at', 'IN'), ('Google', 'NNP'), ('in', 'IN'), ('New', 'NNP'), ('York', 'NNP'), ('.', '.')]


In [22]:
# Named Entity Recognition (NER)
def named_entity_recognition(words):
    return ne_chunk(pos_tagging(words))

print(f"Named Entities:{named_entity_recognition(words)}")


Named Entities:(S
  Natural/JJ
  Language/NNP
  Processing/NNP
  is/VBZ
  an/DT
  exciting/JJ
  field/NN
  of/IN
  (ORGANIZATION Artificial/JJ Intelligence/NNP)
  ./.
  It/PRP
  enables/VBZ
  machines/NNS
  to/TO
  understand/VB
  human/JJ
  language/NN
  ./.
  (PERSON John/NNP)
  works/VBZ
  at/IN
  (ORGANIZATION Google/NNP)
  in/IN
  (GPE New/NNP York/NNP)
  ./.)


In [24]:
# Frequency Distribution
def frequency_distribution(words):
    freq_dist = FreqDist(words)
    return freq_dist.most_common(5)  # Top 5 most common words

print(f"Top 5 Most Common Words: {frequency_distribution(filtered_words)}")

Top 5 Most Common Words: [('.', 3), ('Natural', 1), ('Language', 1), ('Processing', 1), ('exciting', 1)]


In [25]:
# Synonyms and Antonyms
def synonyms_antonyms(word):
    synonyms = []
    antonyms = []

    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
            if lemma.antonyms():
                antonyms.append(lemma.antonyms()[0].name())

    return set(synonyms), set(antonyms)

word_to_check = "happy"
syns, ants = synonyms_antonyms(word_to_check)
print(f"Synonyms for '{word_to_check}':", syns)
print(f"Antonyms for '{word_to_check}':", ants)

Synonyms for 'happy': {'glad', 'well-chosen', 'happy', 'felicitous'}
Antonyms for 'happy': {'unhappy'}
