In [None]:
print("NLP Expt 7")
print("Dhwaj Jain S012")

NLP Expt 7
Dhwaj Jain S012


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk


nltk.download('wordnet')
nltk.download('punkt')
nltk.download('omw-1.4')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
def get_word_relations(word):
    synonyms = set()
    hypernyms = set()
    hyponyms = set()
    antonyms = set()

    synsets = wn.synsets(word)
    if not synsets:
        return list(synonyms), list(hypernyms), list(hyponyms), list(antonyms)

    for syn in synsets:
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
            if lemma.antonyms():
                for antonym in lemma.antonyms():
                    antonyms.add(antonym.name())

        for hypernym in syn.hypernyms():
            hypernyms.add(hypernym.lemma_names()[0])
        for hyponym in syn.hyponyms():
            hyponyms.add(hyponym.lemma_names()[0])

    return list(synonyms), list(hypernyms), list(hyponyms), list(antonyms)


In [None]:
words = ['bank', 'bat', 'good', 'food', 'money', 'light', 'book', 'school', 'burn']

print("\n=== Word Relations ===\n")
for word in words:
    syns, hypers, hyps, ants = get_word_relations(word)
    print(f"Word: {word}")
    print(" Synonyms:", syns if syns else "None")
    print(" Hypernyms:", hypers if hypers else "None")
    print(" Hyponyms:", hyps if hyps else "None")
    print(" Antonyms:", ants if ants else "None")
    print("-" * 50)



=== Word Relations ===

Word: bank
 Synonyms: ['cant', 'bank_building', 'savings_bank', 'money_box', 'coin_bank', 'trust', 'deposit', 'bank', 'banking_company', 'swear', 'banking_concern', 'depository_financial_institution', 'rely', 'camber']
 Hypernyms: ['tip', 'reserve', 'slope', 'container', 'flight_maneuver', 'give', 'act', 'depository', 'enclose', 'array', 'funds', 'transact', 'believe', 'ridge', 'financial_institution', 'work', 'cover']
 Hyponyms: ['thrift_institution', 'bluff', 'count', 'eye_bank', 'member_bank', 'blood_bank', 'credit_union', 'Federal_Reserve_Bank', 'sandbank', 'waterside', 'lead_bank', 'redeposit', 'piggy_bank', 'agent_bank', 'commercial_bank', 'riverbank', 'food_bank', 'Home_Loan_Bank', 'soil_bank', 'credit', 'acquirer', 'merchant_bank', 'lean', 'state_bank', 'vertical_bank']
 Antonyms: ['withdraw', 'mistrust', 'distrust']
--------------------------------------------------
Word: bat
 Synonyms: ['clobber', 'thrash', 'bat', 'squash_racket', 'chiropteran', 'flut

In [None]:
def lesk_wsd(sentence, target_word):
    tokens = word_tokenize(sentence.lower())
    best_synset = lesk(tokens, target_word)
    if best_synset:
        return best_synset, best_synset.definition()
    else:
        return None, None


In [None]:
print("-"*20 + " Word Sense Disambiguation with Lesk " + "-"*20)

sentences = [
    ("bank", "I went to the bank to deposit money."),
    ("bank", "She sat on the river bank and watched the water flow."),
    ("bat", "The cricket player swung the bat hard."),
    ("bat", "A bat flew out of the cave at night."),
    ("apple", "I bought a red apple from the market."),
    ("apple", "He updated the software on his Apple laptop."),
    ("light", "Turn on the light, it’s too dark in here."),
    ("light", "This bag is very light, I can carry it easily."),
    ("book", "She borrowed a book from the library."),
    ("book", "We need to book a hotel room for our trip."),
    ("burn", "You will need to burn that CD to view the movie."),
    ("burn", "Stop burning waste paper.")
]

for word, sent in sentences:
    tokens = word_tokenize(sent)
    sense = lesk(tokens, word)
    print(f"\nSentence: {sent}")
    if sense:
        print(f"\tPredicted Sense: {sense.name()} → {sense.definition()}")
    else:
        print("\tNo sense found.")

-------------------- Word Sense Disambiguation with Lesk --------------------

Sentence: I went to the bank to deposit money.
	Predicted Sense: savings_bank.n.02 → a container (usually with a slot in the top) for keeping money at home

Sentence: She sat on the river bank and watched the water flow.
	Predicted Sense: depository_financial_institution.n.01 → a financial institution that accepts deposits and channels the money into lending activities

Sentence: The cricket player swung the bat hard.
	Predicted Sense: cricket_bat.n.01 → the club used in playing cricket

Sentence: A bat flew out of the cave at night.
	Predicted Sense: bat.v.03 → have a turn at bat

Sentence: I bought a red apple from the market.
	Predicted Sense: apple.n.01 → fruit with red or yellow or green skin and sweet to tart crisp whitish flesh

Sentence: He updated the software on his Apple laptop.
	Predicted Sense: apple.n.02 → native Eurasian tree widely cultivated in many varieties for its firm rounded edible frui

**Conclusion**

In summary, this notebook demonstrates the use of the NLTK library for exploring word relationships such as synonyms, hypernyms, hyponyms, and antonyms using WordNet. It also showcases Word Sense Disambiguation (WSD) using the Lesk algorithm to identify the correct meaning of a word in a given sentence. The results highlight the challenges of WSD, as the Lesk algorithm didn't always select the most accurate sense for all test cases. This type of analysis is fundamental in Natural Language Processing for tasks requiring a deeper understanding of word meanings and their context.