# Big Bench

In [None]:
!pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz" -q

In [None]:
!pip install dspy datasets python-Levenshtein -q

In [98]:
import os
import pandas as pd
from datasets import load_dataset
import dspy
import Levenshtein
import random
from openai import OpenAI
client = OpenAI()

In [381]:
ds = load_dataset("google/bigbench", "conlang_translation")["default"]
examples = [dspy.Example({"question": r["inputs"], "options": r["multiple_choice_targets"], "answer":r['targets']}).with_inputs("question","options") for r in ds]
print(f"There are {len(examples)} examples.")
trainset = random.sample(examples, 3)
valset = [i for i in examples if i not in trainset]

There are 164 examples.


In [382]:
examples[1]

Example({'question': 'The following are sentences in Adna and their English translations:\n\nADNA: Ndengi ngase.\nENGLISH TRANSLATION: He drinks water.\nADNA: Ngoru ndatab ndengi ngase.\nENGLISH TRANSLATION: The child keeps drinking water.\nADNA: Ngoru ndabgu.\nENGLISH TRANSLATION: The child grows.\nADNA: Ngoru ndatab ndangalan.\nENGLISH TRANSLATION: The child keeps crawling.\nADNA: Ndimbi ndamba.\nENGLISH TRANSLATION: He runs here.\nADNA: Ndavera Ambandate ndisbo.\nENGLISH TRANSLATION: He pulls Ambandate down.\nADNA: Ambandate ndisbo.\nENGLISH TRANSLATION: Ambandate goes down.\nADNA: Ndekaselib luta ndaksay.\nENGLISH TRANSLATION: He carries the axe up.\nADNA: Ndalbande ngase ndamba.\nENGLISH TRANSLATION: He brings water.\nADNA: Kansasres ndimbi ndaksay.\nENGLISH TRANSLATION: The teacher runs up.\nADNA: Ngase ndimbi.\nENGLISH TRANSLATION: The water runs.\nADNA: Ambandate ndokami.\nENGLISH TRANSLATION: Ambandate eats.\nADNA: Ndekaselib tebu.\nENGLISH TRANSLATION: He carries taro.\nADNA:

In [383]:
lm = dspy.OpenAI(model="gpt-4o-2024-05-13")
dspy.settings.configure(lm=lm)

In [384]:
def custom_format_handler(value):
    if isinstance(value, list):
        return ' | '.join(map(str, value))
        #return value[-1]
    return value

In [386]:
class CoTSignature(dspy.Signature):
    """You are a translator. Only produce the required translation answer."""
    question = dspy.InputField()
    #options = dspy.InputField(format=custom_format_handler)
    answer = dspy.OutputField(format=custom_format_handler)

In [387]:
simple_predict = dspy.Predict(CoTSignature)

In [388]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.signature = CoTSignature
        self.predictor = dspy.ChainOfThought(self.signature)
    def forward(self, question, options):
      result = self.predictor(lm=lm,question=question, options=options)
      return dspy.Prediction(answer=result.answer)

In [389]:
cot_module = CoT()
test_q = examples[74]
prediction = cot_module.forward(question=test_q.question, options=test_q.options)
print(f"Sense: {prediction.answer}")

Sense: Question: Now, translate the following from English to Holuan: ENGLISH: nineteen people
Reasoning: Let's think step by step in order to produce the answer. We need to identify the Holuan words for "nineteen" and "people." From the given sentences, we know that "three people" is "bame ęg jog" and "nine


In [390]:
simple_predict(question=test_q.question, options=test_q.options)

Prediction(
    answer='mlsea ęg jog'
)

In [391]:
def wsd_metric(example, pred, trace=None, threshold=0.90):
    """Metric function for word sense disambiguation with Levenshtein distance."""
    gold_sense = example.answer[0]
    predicted_sense = pred.answer

    # Calculate Levenshtein similarity
    similarity = Levenshtein.ratio(gold_sense.lower(), predicted_sense.lower())

    # Check if similarity exceeds the threshold
    return int(similarity >= threshold)


In [392]:
from dspy.teleprompt import BootstrapFewShot

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 4-shot examples of our CoT program.
config = dict(max_bootstrapped_demos=0, max_labeled_demos=3)

# Optimize! Use the `gsm8k_metric` here. In general, the metric is going to tell the optimizer how well it's doing.
teleprompter = BootstrapFewShot(metric=wsd_metric, **config)
optimized_cot = teleprompter.compile(CoT(), trainset=trainset)

  0%|          | 0/3 [00:00<?, ?it/s]

Bootstrapped 0 full traces after 1 examples in round 0.





In [393]:
optimized_cot.forward(question=test_q.question, options=test_q.options)

Prediction(
    answer='mlsea ęg jog'
)

In [394]:
lm.inspect_history(1)





You are a translator. Only produce the required translation answer.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION: The child keeps drinking water. ADNA: Ngoru ndabgu. ENGLISH TRANSLATION: The child grows. ADNA: Ngoru ndatab ndangalan. ENGLISH TRANSLATION: The child keeps crawling. ADNA: Ndimbi ndamba. ENGLISH TRANSLATION: He runs here. ADNA: Ndavera Ambandate ndisbo. ENGLISH TRANSLATION: He pulls Ambandate down. ADNA: Ambandate ndisbo. ENGLISH TRANSLATION: Ambandate goes down. ADNA: Ndekaselib luta ndaksay. ENGLISH TRANSLATION: He carries the axe up. ADNA: Ndalbande ngase ndamba. ENGLISH TRANSLATION: He brings water. ADNA: Kansasres ndimbi ndaksay. ENGLISH TRANSLATION: The teacher run

In [395]:
examples[74]['answer']

['tęmoaj na o basea ęg jog']

In [396]:
from dspy.evaluate import Evaluate

# Set up the evaluator, which can be re-used in your code.
evaluator = Evaluate(devset=valset, num_threads=2, display_progress=True, display_table=20,return_outputs=True)

# Launch evaluation.
evaluation_score, outputs = evaluator(optimized_cot, metric=wsd_metric)

Average Metric: 80 / 161  (49.7): 100%|██████████| 161/161 [01:43<00:00,  1.56it/s]

Average Metric: 80 / 161  (49.7%)





Unnamed: 0,question,options,example_answer,pred_answer,wsd_metric
0,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['The teacher carries taro here.'],The teacher carries taro here.,1
1,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['He brings taro up.'],He brings taro up.,1
2,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['The water goes down.'],The water goes down.,1
3,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['Ambandate keeps growing.'],Ambandate keeps growing.,1
4,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['Ambandate keeps carrying the axe here.'],Ambandate keeps carrying the axe here.,1
5,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['He grows.'],He grows.,1
6,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['The child grows taro.'],The child grows taro.,1
7,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['The axe crawls up.'],The axe crawls up.,1
8,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['The palm tree runs.'],The palm tree runs.,1
9,The following are sentences in Adna and their English translations: ADNA: Ndengi ngase. ENGLISH TRANSLATION: He drinks water. ADNA: Ngoru ndatab ndengi ngase. ENGLISH TRANSLATION:...,[],['Kansasres ndekaselib ngase ndisbo.'],Kansasres ndekaselib ngase ndisbo.,1


In [397]:
evaluation_score

49.69

In [398]:
outputs = pd.DataFrame(outputs)

In [399]:
outputs.columns = ['problem','response','result']

In [401]:
outputs.to_json('conlang_translation_GPT_4o_3_shot_results.json',orient='records')