# Big Bench

In [None]:
!pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz" -q

In [None]:
!pip install dspy datasets python-Levenshtein -q

In [98]:
import os
import pandas as pd
from datasets import load_dataset
import dspy
import Levenshtein
import random
from openai import OpenAI
client = OpenAI()

In [334]:
ds = load_dataset("google/bigbench", "metaphor_understanding")["default"]
examples = [dspy.Example({"question": r["inputs"], "options": r["multiple_choice_targets"], "answer":r['targets']}).with_inputs("question","options") for r in ds]
print(f"There are {len(examples)} examples.")
trainset = random.sample(examples, 3)
valset = random.sample([i for i in examples if i not in trainset],200)

Generating default split:   0%|          | 0/234 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/188 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/46 [00:00<?, ? examples/s]

There are 234 examples.


In [337]:
examples[100]

Example({'question': 'paraphrase: \nQ: He was irritable and angry. option: He was in a bright mood. option: He was radiant with joy. option: He was in a black mood. option: He was in high spirits.\nA:', 'options': ['He was in a black mood.', 'He was in a bright mood.', 'He was radiant with joy.', 'He was in high spirits.'], 'answer': ['He was in a black mood.']}) (input_keys={'question', 'options'})

In [338]:
lm = dspy.OpenAI(model="gpt-4o-2024-05-13")
dspy.settings.configure(lm=lm)

In [339]:
def custom_format_handler(value):
    if isinstance(value, list):
        return ' | '.join(map(str, value))
        #return value[-1]
    return value

In [340]:
class CoTSignature(dspy.Signature):
    """Play a language game. Only produce the answer."""
    question = dspy.InputField()
    options = dspy.InputField(format=custom_format_handler)
    answer = dspy.OutputField(format=custom_format_handler)

In [341]:
simple_predict = dspy.Predict(CoTSignature)

In [342]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.signature = CoTSignature
        self.predictor = dspy.ChainOfThought(self.signature)
    def forward(self, question, options):
      result = self.predictor(lm=lm,question=question, options=options)
      return dspy.Prediction(answer=result.answer)

In [343]:
cot_module = CoT()
test_q = examples[74]
prediction = cot_module.forward(question=test_q.question, options=test_q.options)
print(f"Sense: {prediction.answer}")

Sense: Alexander, always a connoisseur of garments, realised that Frederica had changed her style.


In [344]:
simple_predict(question=test_q.question, options=test_q.options)

Prediction(
    answer='Alexander, always a connoisseur of garments, realised that Frederica had changed her style.'
)

In [345]:
def wsd_metric(example, pred, trace=None, threshold=0.90):
    """Metric function for word sense disambiguation with Levenshtein distance."""
    gold_sense = example.answer[0]
    predicted_sense = pred.answer

    # Calculate Levenshtein similarity
    similarity = Levenshtein.ratio(gold_sense.lower(), predicted_sense.lower())

    # Check if similarity exceeds the threshold
    return int(similarity >= threshold)


In [346]:
from dspy.teleprompt import BootstrapFewShot

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 4-shot examples of our CoT program.
config = dict(max_bootstrapped_demos=0, max_labeled_demos=3)

# Optimize! Use the `gsm8k_metric` here. In general, the metric is going to tell the optimizer how well it's doing.
teleprompter = BootstrapFewShot(metric=wsd_metric, **config)
optimized_cot = teleprompter.compile(CoT(), trainset=trainset)

  0%|          | 0/3 [00:00<?, ?it/s]

Bootstrapped 0 full traces after 1 examples in round 0.





In [347]:
optimized_cot.forward(question=test_q.question, options=test_q.options)

Prediction(
    answer='Alexander, always a connoisseur of garments, realised that Frederica had changed her style.'
)

In [348]:
lm.inspect_history(1)





Play a language game. Only produce the answer.

---

Question: paraphrase: Q: Ideas and aspirations must be embodied in manifest acts if reality is to be changed. option: Ideas and aspirations must be embodied in manifest acts if reality is to be converted. option: Ideas and aspirations must be embodied in manifest acts if reality is to be transferred. option: Ideas and aspirations must be embodied in manifest acts if a different reality is to be put on. option: Ideas and aspirations must be embodied in manifest acts if reality is to be modified. A:
Options: Ideas and aspirations must be embodied in manifest acts if reality is to be modified. | Ideas and aspirations must be embodied in manifest acts if reality is to be converted. | Ideas and aspirations must be embodied in manifest acts if a different reality is to be put on. | Ideas and aspirations must be embodied in manifest acts if reality is to be transferred.
Answer: Ideas and aspirations must be embodied in manifest acts if 

In [349]:
examples[74]['answer']

['Alexander, always a connoisseur of garments, realised that Frederica had changed her style.']

In [350]:
from dspy.evaluate import Evaluate

# Set up the evaluator, which can be re-used in your code.
evaluator = Evaluate(devset=valset, num_threads=2, display_progress=True, display_table=20,return_outputs=True)

# Launch evaluation.
evaluation_score, outputs = evaluator(optimized_cot, metric=wsd_metric)

Average Metric: 170 / 200  (85.0): 100%|██████████| 200/200 [02:48<00:00,  1.19it/s]

Average Metric: 170 / 200  (85.0%)





Unnamed: 0,question,options,example_answer,pred_answer,wsd_metric
0,"paraphrase: Q: By the time Mark arrived at home that evening, the pains in his head were excruciating. option: By the time Mark contacted home...","['By the time Mark reached home that evening, the pains in his head were excruciating.', 'By the time Mark contacted home that evening, the pains...","['By the time Mark reached home that evening, the pains in his head were excruciating.']","By the time Mark reached home that evening, the pains in his head were excruciating.",1
1,paraphrase: Q: A break up can leave you with a broken heart. option: A break up can make you feel happy and smiling. option: A...,"['A break up can make you feel hurt and sad.', 'A break up can make you feel happy and smiling.', 'A break up can make...",['A break up can make you feel hurt and sad.'],A break up can make you feel hurt and sad.,1
2,paraphrase: Q: The Liberal Unionists were admitted to the Carlton Club. option: The Liberal Unionists were allowed into the Carlton Club. option: The Liberal Unionists...,"['The Liberal Unionists were allowed into the Carlton Club.', 'The Liberal Unionists were confessed to the Carlton Club.', 'The Liberal Unionists were introduced to the...",['The Liberal Unionists were allowed into the Carlton Club.'],The Liberal Unionists were allowed into the Carlton Club.,1
3,paraphrase: Q: I asked your help and it was impossible to find you option: I asked for your help and you broke my heart option:...,"['I asked for your help and you disappeared from radar', 'I asked for your help and you appeared like a jack-in-the-box', 'I asked for your...",['I asked for your help and you disappeared from radar'],I asked for your help and you disappeared from radar,1
4,paraphrase: Q: His condescending attitude made my blood boil. option: His condescending attitude made me really excited. option: His condescending attitude made me really joyful....,"['His condescending attitude made me really angry.', 'His condescending attitude made me really excited.', 'His condescending attitude made me really intrigued.', 'His condescending attitude made...",['His condescending attitude made me really angry.'],His condescending attitude made me really angry.,1
5,paraphrase: Q: he was overwhelmed by happiness option: he was so happy he felt his feet were exploading option: he was so happy he felt...,"['he was so happy he felt his heart was exploding', 'he was so happy he felt his feet were exploading', 'he was so happy he...",['he was so happy he felt his heart was exploding'],he was so happy he felt his heart was exploding,1
6,paraphrase: Q: He felt an intense excitement option: He felt a stream of excitement. option: Het felt a flood of excitement. option: He felt a...,"['He felt a wave of excitement', 'He felt a drop of excitement.', 'He felt a stream of excitement.', 'Het felt a flood of excitement.']",['He felt a wave of excitement'],He felt a wave of excitement,1
7,paraphrase: Q: the demonstration which Keith has earlier persuaded him to take part in against the Vietnam War option: the demonstration which Keith has earlier...,"['the demonstration which Keith has earlier persuaded him to join against the Vietnam War', 'the demonstration which Keith has earlier persuaded him to connect against...",['the demonstration which Keith has earlier persuaded him to join against the Vietnam War'],the demonstration which Keith has earlier persuaded him to join against the Vietnam War,1
8,paraphrase: Q: Apart from the last one and the penultimate one which seemed to apply in America more than here (here he had become part...,['Apart from the last one and the penultimate one which seemed to apply in America more than here (here he had joined the golf club)...,['Apart from the last one and the penultimate one which seemed to apply in America more than here (here he had joined the golf club)...,Apart from the last one and the penultimate one which seemed to apply in America more than here (here he had joined the golf club)...,1
9,paraphrase: Q: A ‘new Minister of Environmental Protection’ would operate inside the Department of Energy. option: A ‘new Minister of Environmental Protection’ would function inside...,"['A ‘new Minister of Environmental Protection’ would work inside the Department of Energy.', 'A ‘new Minister of Environmental Protection’ would take effect inside the Department...",['A ‘new Minister of Environmental Protection’ would work inside the Department of Energy.'],A ‘new Minister of Environmental Protection’ would work inside the Department of Energy.,1


In [351]:
evaluation_score

85.0

In [352]:
outputs = pd.DataFrame(outputs)

In [353]:
outputs.columns = ['problem','response','result']

In [354]:
outputs.iloc[95,0]

Example({'question': 'paraphrase: \nQ: He is very important to me. option: He is the berry of my eye. option: He is the persimmon of my eye. option: He is the apple of my eye. option: He is the pear of my eye.\nA:', 'options': ['He is the apple of my eye.', 'He is the pear of my eye.', 'He is the berry of my eye.', 'He is the persimmon of my eye.'], 'answer': ['He is the apple of my eye.']}) (input_keys={'question', 'options'})

In [355]:
outputs.to_json('metaphor_understanding_GPT_4o_3_shot_results.json',orient='records')