In [39]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag, ne_chunk, word_tokenize
import spacy
from nltk.tree import Tree

nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('tagsets')

words = ['bear', 'set', 'square', 'lead', 'criteria']


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package tagsets to /root/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


In [40]:
# a) Find the 3rd meaning of the word in the list
for word in words:
    synsets = wn.synsets(word)
    if len(synsets) >= 3:
        print(f"3rd Meaning of '{word}': {synsets[2].definition()}")

3rd Meaning of 'bear': have
3rd Meaning of 'set': several exercises intended to be done in series
3rd Meaning of 'square': an open area at the meeting of two or more streets
3rd Meaning of 'lead': evidence pointing to a possible solution


In [16]:
# b) Extract the nouns of the word from the synonyms list
for word in words:
    nouns = [syn.name().split('.')[0] for syn in wn.synsets(word) if syn.pos() == 'n']
    print(f"Nouns for '{word}': {nouns}")

Nouns for 'bear': ['bear', 'bear']
Nouns for 'set': ['set', 'set', 'set', 'stage_set', 'set', 'bent', 'set', 'set', 'hardening', 'set', 'set', 'set', 'set']
Nouns for 'square': ['square', 'square', 'public_square', 'square', 'square', 'square', 'square', 'square']
Nouns for 'lead': ['lead', 'lead', 'lead', 'lead', 'lead', 'lead', 'lead', 'star', 'lead', 'tip', 'lead', 'spark_advance', 'leash', 'lead', 'lead', 'jumper_cable', 'lead']
Nouns for 'criteria': ['standard', 'criterion']


In [41]:
# c) Extract the verbs of the word from the synonyms list
for word in words:
    verbs = [syn.name().split('.')[0] for syn in wn.synsets(word) if syn.pos() == 'v']
    print(f"Verbs for '{word}': {verbs}")

Verbs for 'bear': ['bear', 'give_birth', 'digest', 'bear', 'bear', 'bear', 'hold', 'yield', 'wear', 'behave', 'bear', 'hold', 'have_a_bun_in_the_oven']
Verbs for 'set': ['put', 'determine', 'specify', 'set', 'set', 'set', 'fix', 'set', 'set', 'set', 'arrange', 'plant', 'set', 'jell', 'typeset', 'set', 'set', 'set', 'sic', 'place', 'rig', 'set_up', 'adjust', 'fructify', 'dress']
Verbs for 'square': ['square', 'square', 'square', 'square', 'square', 'square', 'feather', 'feather']
Verbs for 'lead': ['lead', 'leave', 'lead', 'lead', 'lead', 'run', 'head', 'lead', 'contribute', 'conduct', 'go', 'precede', 'run', 'moderate']
Verbs for 'criteria': []


In [42]:
# d) Extract the adjectives of the word from the synonyms list
for word in words:
    adjectives = [syn.name().split('.')[0] for syn in wn.synsets(word) if syn.pos() == 'a']
    print(f"Adjectives for '{word}': {adjectives}")


Adjectives for 'bear': []
Adjectives for 'set': []
Adjectives for 'square': ['square', 'straight']
Adjectives for 'lead': []
Adjectives for 'criteria': []


In [43]:
# e) Extract the adverbs of the word from the synonyms list
for word in words:
    adverbs = [syn.name().split('.')[0] for syn in wn.synsets(word) if syn.pos() == 'r']
    print(f"Adverbs for '{word}': {adverbs}")

Adverbs for 'bear': []
Adverbs for 'set': []
Adverbs for 'square': ['squarely', 'squarely', 'squarely']
Adverbs for 'lead': []
Adverbs for 'criteria': []


In [44]:
# f) Extract the definition of the word
for word in words:
    synsets = wn.synsets(word)
    if synsets:
        print(f"Definition of '{word}': {synsets[0].definition()}")

Definition of 'bear': massive plantigrade carnivorous or omnivorous mammals with long shaggy coats and strong claws
Definition of 'set': a group of things of the same kind that belong together and are so used
Definition of 'square': (geometry) a plane rectangle with four equal sides and four right angles; a four-sided regular polygon
Definition of 'lead': an advantage held by a competitor in a race
Definition of 'criteria': a basis for comparison; a reference point against which other things can be evaluated


In [45]:
# g) Program to get antonyms from WordNet
for word in words:
    antonyms = []
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            if lemma.antonyms():
                antonyms.append(lemma.antonyms()[0].name())
    print(f"Antonyms for '{word}': {antonyms}")

Antonyms for 'bear': ['bull']
Antonyms for 'set': ['rise']
Antonyms for 'square': ['round', 'crooked']
Antonyms for 'lead': ['deficit', 'follow']
Antonyms for 'criteria': []


In [46]:
# h) Lemmatizing words using WordNet and comparison with stemming
lemmatizer = WordNetLemmatizer()
stemmed_words = {}

for word in words:
    lemma = lemmatizer.lemmatize(word)
    stemmed_words[word] = lemma
print("Lemmatized Words:", stemmed_words)

Lemmatized Words: {'bear': 'bear', 'set': 'set', 'square': 'square', 'lead': 'lead', 'criteria': 'criterion'}


In [47]:
# i) Differentiating stemming and lemmatizing
from nltk.stem import PorterStemmer, LancasterStemmer

porter = PorterStemmer()
lancaster = LancasterStemmer()

print("Stemming and Lemmatization Comparison")
for word in words:
    print(f"Word: {word}, Porter Stem: {porter.stem(word)}, Lancaster Stem: {lancaster.stem(word)}, Lemma: {lemmatizer.lemmatize(word)}")


Stemming and Lemmatization Comparison
Word: bear, Porter Stem: bear, Lancaster Stem: bear, Lemma: bear
Word: set, Porter Stem: set, Lancaster Stem: set, Lemma: set
Word: square, Porter Stem: squar, Lancaster Stem: squ, Lemma: square
Word: lead, Porter Stem: lead, Lancaster Stem: lead, Lemma: lead
Word: criteria, Porter Stem: criteria, Lancaster Stem: criter, Lemma: criterion


In [53]:
from IPython import get_ipython
from IPython.display import display

# Download the required data package
nltk.download('averaged_perceptron_tagger_eng')

words = word_tokenize(sentence)
print("Tokens:", words)

nltk.download("averaged_perceptron_tagger")
nltk.download("punkt")

def pos_tagging(sentence):
    words = word_tokenize(sentence)
    print("Tokens:", words)  # Debugging step
    tagged = pos_tag(words)
    print("PoS Tagging:", tagged)

# Test it
sentence = "John Doe visited Paris last summer"


Tokens: ['John', 'Doe', 'visited', 'Paris', 'last', 'summer']


[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [54]:
# k) Named Entity Recognition
def named_entity_recognition(sentence):
    doc = nlp(sentence)
    for ent in doc.ents:
        print(f"Entity: {ent.text}, Type: {ent.label_}")


print("\n--- Named Entity Recognition ---")
named_entity_recognition(sentence)


--- Named Entity Recognition ---
Entity: John Doe, Type: PERSON
Entity: Paris, Type: GPE
Entity: last summer, Type: DATE


In [55]:
# l) Dependency and Constituency Parsing using spaCy
nlp = spacy.load("en_core_web_sm")
doc = nlp("John Doe visited Paris last summer.")

print("\nDependency Parsing:")
for token in doc:
    print(f"{token.text} -> {token.dep_} ({token.head.text})")

print("\nConstituency Parsing (Subtrees):")
for sent in doc.sents:
    print(list(sent.subtree))



Dependency Parsing:
John -> compound (Doe)
Doe -> nsubj (visited)
visited -> ROOT (visited)
Paris -> dobj (visited)
last -> amod (summer)
summer -> npadvmod (visited)
. -> punct (visited)

Constituency Parsing (Subtrees):
[John, Doe, visited, Paris, last, summer, .]
