In [1]:
pip install nltk



In [2]:
import nltk
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [3]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [5]:
print("The NLTK library is installed and the 'wordnet' and 'punkt' corpora have been downloaded. The subtask is complete.")

The NLTK library is installed and the 'wordnet' and 'punkt' corpora have been downloaded. The subtask is complete.


In [6]:
ambiguous_sentence = "The bank is located near the river."
target_word = "bank"

print(f"Ambiguous Sentence: {ambiguous_sentence}")
print(f"Target Word for Disambiguation: {target_word}")

Ambiguous Sentence: The bank is located near the river.
Target Word for Disambiguation: bank


In [8]:
nltk.download('punkt_tab')
print("Downloaded 'punkt_tab' resource.")

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Downloaded 'punkt_tab' resource.


**Reasoning**:
Since the 'punkt_tab' resource has now been successfully downloaded, I will re-execute the original code block to perform tokenization and Part-of-Speech tagging, which should now run without the previous `LookupError`.



In [10]:
nltk.download('averaged_perceptron_tagger_eng')
print("Downloaded 'averaged_perceptron_tagger_eng' resource.")

Downloaded 'averaged_perceptron_tagger_eng' resource.


[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


In [11]:
tokens = nltk.word_tokenize(ambiguous_sentence)
pos_tags = nltk.pos_tag(tokens)

print(f"Tokens: {tokens}")
print(f"POS Tags: {pos_tags}")

Tokens: ['The', 'bank', 'is', 'located', 'near', 'the', 'river', '.']
POS Tags: [('The', 'DT'), ('bank', 'NN'), ('is', 'VBZ'), ('located', 'VBN'), ('near', 'IN'), ('the', 'DT'), ('river', 'NN'), ('.', '.')]


In [12]:
from nltk.corpus import wordnet
from nltk.corpus import stopwords

def disambiguate_word(sentence, target_word, pos_tags):
    stop_words = set(stopwords.words('english'))


    target_pos = None
    for word, tag in pos_tags:
        if word.lower() == target_word.lower():
            if tag.startswith('N'):
                target_pos = wordnet.NOUN
            elif tag.startswith('V'):
                target_pos = wordnet.VERB
            elif tag.startswith('ADJ'):
                target_pos = wordnet.ADJ
            elif tag.startswith('R'):
                target_pos = wordnet.ADV
            break

    best_synset = None
    max_overlap = -1


    sentence_tokens = [w.lower() for w in nltk.word_tokenize(sentence) if w.isalpha() and w.lower() != target_word.lower() and w.lower() not in stop_words]
    sentence_set = set(sentence_tokens)


    for synset in wordnet.synsets(target_word):
        if target_pos and synset.pos() != target_pos:
            continue


        gloss = synset.definition()
        examples = ' '.join(synset.examples())


        synset_text = gloss + ' ' + examples
        synset_tokens = [w.lower() for w in nltk.word_tokenize(synset_text) if w.isalpha() and w.lower() not in stop_words]
        synset_set = set(synset_tokens)


        overlap = len(sentence_set.intersection(synset_set))

        if overlap > max_overlap:
            max_overlap = overlap
            best_synset = synset

    return best_synset

print("The 'disambiguate_word' function has been defined.")

The 'disambiguate_word' function has been defined.


In [15]:
best_sense = disambiguate_word(ambiguous_sentence, target_word, pos_tags)

if best_sense:
    print(f"The most likely sense for '{target_word}' in the sentence is: {best_sense.name()} ({best_sense.definition()})")
else:
    print(f"Could not disambiguate sense for '{target_word}'.")

The most likely sense for 'bank' in the sentence is: bank.n.01 (sloping land (especially the slope beside a body of water))
