# Sense Disambiguation Task

## Imports and Setup

In [14]:
# Imports
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger')

from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [15]:
lemmatizer = WordNetLemmatizer()

##  Define the Lesk Algorithm


In [17]:
def lesk_with_debug(context_sentence, ambiguous_word):
    context = set(lemmatizer.lemmatize(w.lower()) for w in context_sentence)
    best_sense = None
    max_overlap = 0

    print(f"\ All senses for '{ambiguous_word}':")
    for sense in wn.synsets(ambiguous_word):
        print(f"  {sense.name():<20} — {sense.definition()}")

    print("\n Evaluating senses with overlap:")
    for sense in wn.synsets(ambiguous_word):
        # Gloss + examples + hypernyms
        signature = set(nltk.word_tokenize(sense.definition()))
        signature |= set(sum([nltk.word_tokenize(ex) for ex in sense.examples()], []))
        for hyper in sense.hypernyms():
            signature |= set(nltk.word_tokenize(hyper.definition()))
        # Lemmatize signature
        signature = set(lemmatizer.lemmatize(w.lower()) for w in signature)

        overlap = len(context & signature)
        print(f"Trying: {sense.name():<20} | Overlap = {overlap}")
        if overlap > max_overlap:
            max_overlap = overlap
            best_sense = sense

    return best_sense

## Input Sentences and Target Words

In [18]:
# Word Sense Disambiguation using Lesk

# Define sample sentences
sentences = [
    "He sat on the bank of the river.",       # bank = riverbank
    "She deposited the money at the bank.",   # bank = financial institution
    "The crane is flying over the construction site.",  # crane = bird or machine?
    "He couldn't bear the pain any longer.",  # bear = tolerate or animal?
    "The cell was locked for the night."      # cell = prison or biology?
]

# Target words for disambiguation
target_words = ["bank", "bank", "crane", "bear", "cell"]

## Run Disambiguation with Full Sense & Overlap Info


In [36]:
for sentence, word in zip(sentences, target_words):
    print("\n" + "="*80)
    print(f" Sentence: {sentence}")
    context = word_tokenize(sentence)
    sense = lesk_with_debug(context, word)

    # Show all senses briefly
    print(f"\nPossible Senses for '{word}':")
    for syn in wn.synsets(word):
        print(f"  - {syn.name():<25} → {syn.definition()}")

    # Final prediction
    if sense:
        print("\nPredicted Sense:")
        print(f"   {sense.name()} — {sense.definition()}")
    else:
        print("\n No suitable sense found.")




 Sentence: He sat on the bank of the river.

Possible Senses for 'bank':
  - bank.n.01                 → sloping land (especially the slope beside a body of water)
  - depository_financial_institution.n.01 → a financial institution that accepts deposits and channels the money into lending activities
  - bank.n.03                 → a long ridge or pile
  - bank.n.04                 → an arrangement of similar objects in a row or in tiers
  - bank.n.05                 → a supply or stock held in reserve for future use (especially in emergencies)
  - bank.n.06                 → the funds held by a gambling house or the dealer in some gambling games
  - bank.n.07                 → a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
  - savings_bank.n.02         → a container (usually with a slot in the top) for keeping money at home
  - bank.n.09                 → a building in which the business of banking tran

## Clean Version of Lesk (No Debug Output)

In [32]:
## for clean output (no overlap info)---
def lesk_with_debug(context_sentence, ambiguous_word):
    context = set(lemmatizer.lemmatize(w.lower()) for w in context_sentence)
    best_sense = None
    max_overlap = 0

    for sense in wn.synsets(ambiguous_word):
        signature = set(nltk.word_tokenize(sense.definition()))
        signature |= set(sum([nltk.word_tokenize(ex) for ex in sense.examples()], []))
        for hyper in sense.hypernyms():
            signature |= set(nltk.word_tokenize(hyper.definition()))
        signature = set(lemmatizer.lemmatize(w.lower()) for w in signature)

        overlap = len(context & signature)
        if overlap > max_overlap:
            max_overlap = overlap
            best_sense = sense

    return best_sense


## Final Output: Clean Disambiguated Meanings

In [31]:
for sentence, word in zip(sentences, target_words):
    context = word_tokenize(sentence)
    sense = lesk_with_debug(context, word)  # still uses improved overlap logic

    print("==============================================")
    print(f" Sentence: {sentence}")
    if sense:
        print(f" Meaning of '{word}': {sense.definition()}")
    else:
        print(f" Meaning of '{word}': No suitable sense found.")


 Sentence: He sat on the bank of the river.
 Meaning of 'bank': sloping land (especially the slope beside a body of water)
 Sentence: She deposited the money at the bank.
 Meaning of 'bank': a financial institution that accepts deposits and channels the money into lending activities
 Sentence: The crane is flying over the construction site.
 Meaning of 'crane': a small constellation in the southern hemisphere near Phoenix
 Sentence: He couldn't bear the pain any longer.
 Meaning of 'bear': put up with something or somebody unpleasant
 Sentence: The cell was locked for the night.
 Meaning of 'cell': any small compartment
