In [95]:
from nltk.corpus import wordnet as wn
import nltk
nltk.download('wordnet')
import pandas as pd
from bs4 import BeautifulSoup
import requests
import json
import random
import re

[nltk_data] Downloading package wordnet to /Users/amin/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
with open('/Users/amin/obisidian vault/Thesis/wsd benchmarks files/senseval3/xl-wsd/evaluation_datasets/test-en/test-en.gold.key.txt','r') as file:
    keys = {i.strip().split(' ')[0]:i.strip().split(' ')[1] for i in file.readlines() if len(i.split(' ')) == 2} # removing the ones with more than one sense as the response

In [3]:
with open('/Users/amin/obisidian vault/Thesis/wsd benchmarks files/senseval3/xl-wsd/evaluation_datasets/test-en/test-en.data.xml',) as file:
    soup = BeautifulSoup(file, features="xml")

In [4]:
def get_wn_gloss(synset_id):
    url = f"https://babelnet.io/v9/getSynset?id={synset_id}&wnVersion=WN_30&source=WN&key=010db22e-19bf-4564-a527-8bd6b2da872a"
    
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        glosses = data.get("glosses", [])
        for gloss in glosses:
            if gloss.get("source") == "WN":
                return gloss.get("gloss")
    else:
        return f"Error: Unable to fetch data, status code {response.status_code}"

In [5]:
pos_keys = {'ADJ':"a", 'ADV':"r", 'NOUN':"n", 'VERB':"v"}
# Path to your XML file
xml_file_path = '/Users/amin/obisidian vault/Thesis/wsd benchmarks files/senseval3/xl-wsd/evaluation_datasets/test-en/test-en.data.xml'

# Load and parse the XML file
with open(xml_file_path, 'r') as file:
    soup = BeautifulSoup(file, features="xml")

# Initialize an empty list to store JSON items
json_data = []

# Iterate through each sentence in the XML
for sentence in soup.find_all('sentence'):
    # Extract the full sentence without any tags first
    sentence_text = []
    for element in sentence.find_all(['wf', 'instance']):
        word = element.get_text()
        
        # Handle punctuation: no space before punctuation marks
        if element.name == 'wf' and element['pos'] == '.':
            if sentence_text:
                sentence_text[-1] += word  # Append punctuation to the last word
        else:
            sentence_text.append(word)
    
    # Join the sentence text
    full_sentence = ' '.join(sentence_text)

    # Create a JSON item for each disambiguation instance
    for instance in sentence.find_all('instance'):
        word = instance.get_text()
        target_id = instance['id']
        lemma = instance['lemma']
        pos = pos_keys[instance['pos']]

        # Recreate the sentence with only the current target word tagged
        tagged_sentence = full_sentence.replace(word, f"<target>{word}</target>", 1)

        json_item = {
            'sentence': tagged_sentence,
            'target_word': word,
            'target_id': keys.get(target_id),
            'lemma': lemma,
            'pos': pos,
            'defs': [i.definition() for i in wn.synsets(lemma,pos)]
        }
        if json_item['target_id']:
            json_data.append(json_item)

# Convert the list to JSON
json_output = json.dumps(json_data, indent=2)

# Output the JSON to a file (optional)
output_file_path = 'all_data_with_babelnet.json'
with open(output_file_path, 'w') as json_file:
    json_file.write(json_output)

# Alternatively, print the JSON to the console
#print(json_output)

In [6]:
random.seed(42)
random_sample = random.sample(json_data,250)

for i in range(len(random_sample)):
    random_sample[i]['gold'] = get_wn_gloss(random_sample[i]['target_id'])

random_sample_json_output = json.dumps(random_sample, indent=2)

# Output the JSON to a file (optional)
output_file_path = 'random_sample_with_meaning.json'
with open(output_file_path, 'w') as json_file:
    json_file.write(random_sample_json_output)


# run the benchmark

In [47]:
import dspy
import Levenshtein
from dspy.teleprompt import BootstrapFewShot
from dspy.evaluate import Evaluate

In [54]:
examples = [dspy.Example({"context": r["sentence"], "definitions": r["defs"], "word":r['target_word'],"sense":r['gold']}).with_inputs("context","word", "definitions") for r in random_sample]
print(f"There are {len(examples)} examples.")
trainset = random.sample(examples, 3)
valset = [i for i in examples if i not in trainset]

There are 250 examples.


In [101]:
def custom_format_handler(value):
    if isinstance(value, list):
        return " | ".join(map(str, value))
    return value

lm = dspy.OpenAI(
    model="gpt-4o",
    api_key='',
    max_tokens=4000
)


class CoTSignature(dspy.Signature):
    """Word Sense Disambiguation: Your task is to return verbatim the most relevant definition from the definitions provided without any changes or extra information."""

    context = dspy.InputField(
        desc="the context within which the target word is disambiguated"
    )
    word = dspy.InputField(
        desc="the target word in the context that needs to be disambiguated"
    )
    definitions = dspy.InputField(
        desc="list of definitions for the target word", format=custom_format_handler
    )
    sense = dspy.OutputField(
        desc="the definition selected from the list of definitions that best matches for the target word's usage in the context provided",
        format=custom_format_handler,
    )


class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predictor = dspy.ChainOfThought(CoTSignature)
    def forward(self, context, word, definitions):
        result = self.predictor(
            lm=lm,
            context=context,
            word=word,
            definitions=definitions,
        )
        return dspy.Prediction(sense=result.sense)


def wsd_metric(example, pred, trace=None, threshold=0.55):
    """Metric function for word sense disambiguation with Levenshtein distance."""
    gold_sense = example['sense']
    predicted_sense = pred.sense
    match = re.search(r'Sense:\s*(.+)', predicted_sense)
    if match:
        predicted_sense = match.group(1).strip()

    # Calculate Levenshtein similarity
    similarity = Levenshtein.ratio(gold_sense.lower(), predicted_sense.lower())

    # Check if similarity exceeds the threshold
    return int(similarity >= threshold)


dspy.settings.configure(lm=lm)
cot_module = CoT()
teleprompter = BootstrapFewShot(
    metric=wsd_metric, max_bootstrapped_demos=0, max_labeled_demos=9
)
optimized_cot = teleprompter.compile(cot_module, trainset=trainset)


  0%|          | 0/3 [00:00<?, ?it/s]

Bootstrapped 0 full traces after 1 examples in round 0.





In [102]:
evaluator = Evaluate(devset=valset, num_threads=2, display_progress=True, display_table=20,return_outputs=True)

evaluation_score, outputs = evaluator(optimized_cot, metric=wsd_metric)

Average Metric: 202 / 247  (81.8): 100%|██████████| 247/247 [06:39<00:00,  1.62s/it]


Unnamed: 0,context,definitions,word,example_sense,pred_sense,wsd_metric
0,"Fragmentation and flow regulation of large river systems, by region.","['the motion characteristic of fluids (liquids or gases)', 'the amount of fluid that flows in a given time', 'the act of flowing or streaming; continuous...",flow,The act of flowing or streaming; continuous progression,the motion characteristic of fluids (liquids or gases),0
1,"All this may not be obvious to the public, which is concerned about advances in treatment, but I am convinced this basic research will begin...","['people in general considered as a whole', 'a body of people sharing some common interest']",public,People in general considered as a whole,people in general considered as a whole,✔️ [1]
2,"Ringers memorize patterns of changes, known as`` methods,`` which have odd-sounding names like Kent Treble Bob Major or Grandsire Caters.","['an event that occurs when something passes from one state or phase to another', 'a relational difference between states; especially between states before and after...",changes,The result of alteration or modification,The action of changing something,0
3,"But it is about a long term advantage, with a certain degree of indetermination, because the team can be eliminated first of change, and in...","['formal and explicit approval', ""a mechanism of social control for enforcing a society's standards"", 'official permission or approval', 'the act of final authorization']",sanction,A mechanism of social control for enforcing a society's standards,a mechanism of social control for enforcing a society's standards,✔️ [1]
4,"His shout had been involuntary, something anybody might have done without thinking, on the spur of the moment.",['the process of using your mind to consider something carefully'],thinking,The process of using your mind to consider something carefully,"Context: His shout had been involuntary, something anybody might have done without thinking, on the spur of the moment. Word: thinking Definitions: the process of...",✔️ [1]
5,"The new money flows into the already top-heavy administrative structure, which busies itself piling more and more paper work on the teachers.","['arrange in stacks', 'press tightly together or cram', 'place or lay as if in a pile']",piling,Place or lay as if in a pile,arrange in stacks,0
6,"There are many successful schools scattered throughout this nation, some of them in the poorest of ghettos, and they are all sending us the same...","['an educational institution', 'a building where young people receive education', 'the process of being formally educated at a school', 'a body of creative artists or...",schools,An educational institution,an educational institution,✔️ [1]
7,I was convinced that what was true of retinoblastoma would be true for all cancers.``,"['consistent with fact or reality; not false; ; - B. Russell', 'accurately placed or thrown', 'devoted (sometimes fanatically) to a cause or concept or truth',...",true,Consistent with fact or reality; not false,consistent with fact or reality; not false; ; - B. Russell,✔️ [1]
8,"However, the paradoxical thing about the action of the players of Real Madrid is that it did not comply to what we normally characterize as...","['a principle or condition that customarily governs behavior', 'something regarded as a normative example', 'prescribed guide for conduct or action', '(linguistics) a rule describing (or...",rules,Prescribed guide for conduct or action,Directions that define the way a game or sport is to be conducted,0
9,"Mr. Quinlan, 30 years old, knew he carried a damaged gene, having lost an eye to the rare tumor when he was only two months...","['a woman who has given birth to a child (also used as a term of address to your mother)', 'a stringy slimy substance consisting of...",mother,A woman who has given birth to a child (also used as a term of address to your mother),a woman who has given birth to a child (also used as a term of address to your mother),✔️ [1]


In [103]:
evaluation_score

81.78

In [104]:
outputs = pd.DataFrame(outputs)

In [105]:
outputs.columns = ['problem','response','result']

In [106]:
outputs.to_json('senseval_3_sample_GPT-4o_3shot.json',orient='records')