In [1]:
import os
import dspy
from datasets import load_dataset

from dotenv import load_dotenv
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
key = os.environ.get("OPENAI_API_KEY")

In [3]:
lm = dspy.LM("openai/gpt-4.1-mini", temperature=1, api_key=key, max_tokens=32000)
dspy.configure(lm=lm)

In [4]:
from tasks.baselines.afriqa import _parse_str_list

In [None]:

def init_dataset():
    dataset = load_dataset("masakhane/afrimmlu", "yor")
    train_split = dataset["validation"]
    validation_split = dataset["dev"]
    test_split = dataset["test"]
    train_split = [
        dspy.Example({
            "question": x['question'],
            "subject": x['subject'],
            'choices': _parse_str_list(x['choices']),
            "answer": x['answer'],
        }).with_inputs("question", "subject", "choices")
        for x in train_split
    ]

    validation_split = [
        dspy.Example({
            "question": x['question'],
            "subject": x['subject'],
            'choices': _parse_str_list(x['choices']),
            "answer": x['answer'],
        }).with_inputs("question", "subject", "choices")        for x in validation_split
    ]    

    test_split = [
        dspy.Example({
            "question": x['question'],
            "subject": x['subject'],
            'choices': _parse_str_list(x['choices']),
            "answer": x['answer'],
        }).with_inputs("question", "subject", "choices")        for x in test_split
    ]

    train_set = train_split
    val_set = validation_split
    test_set = test_split

    return train_set, val_set, test_set

In [6]:
train_set, val_set, test_set = init_dataset()

len(train_set), len(val_set), len(test_set)

(83, 25, 500)

In [7]:
merged_train_set = list(train_set) + list(val_set)

In [8]:
import random
from collections import defaultdict

def stratified_val_from_test(examples, total_val=100, seed=0):
    """
    Split `examples` into:
      - val_set: `total_val` examples, stratified by `subject`
      - new_test_set: the remaining examples
    """
    rng = random.Random(seed)
    ex_list = list(examples)          # shallow copy, don't mutate original
    rng.shuffle(ex_list)

    # group by subject
    by_subject = defaultdict(list)
    for ex in ex_list:
        subj = ex['subject']
        by_subject[subj].append(ex)

    N = len(ex_list)
    if total_val > N:
        raise ValueError(f"total_val={total_val} > dataset size {N}")

    # 1) proportional allocation per subject
    alloc = {}
    fracs = []  # (fractional_part, subj)
    for subj, items in by_subject.items():
        n_s = len(items)
        raw = total_val * n_s / N   # ideal fractional quota
        base = int(raw)
        alloc[subj] = base
        fracs.append((raw - base, subj))

    # 2) fix rounding so total is exactly `total_val`
    current = sum(alloc.values())
    remaining = total_val - current
    # give the leftover slots to subjects with largest fractional remainders
    fracs.sort(reverse=True)  # largest fractional part first
    for _, subj in fracs:
        if remaining <= 0:
            break
        alloc[subj] += 1
        remaining -= 1

    # 3) actually sample per subject
    val_set = []
    new_test_set = []
    for subj, items in by_subject.items():
        k = alloc[subj]
        val_set.extend(items[:k])
        new_test_set.extend(items[k:])

    return val_set, new_test_set


In [9]:
new_val_set, new_test_set = stratified_val_from_test(test_set)


In [10]:
class GenerateResponse(dspy.Signature):
    """Answer the question with the correct option (A–D) from the given choices."""
    question = dspy.InputField()
    subject = dspy.InputField()
    choices = dspy.InputField(desc="List of four answer options ordered as [A,B,C,D].")
    answer = dspy.OutputField(
        desc="Return exactly one letter in {A,B,C,D}, where A=choices[0], B=choices[1], C=choices[2], D=choices[3]."
    )

program = dspy.ChainOfThought(GenerateResponse)


In [11]:
def metric(example, prediction, trace=None, pred_name=None, pred_trace=None):
    correct_answer = str(example["answer"]).strip()
    llm_answer = str(prediction.answer).strip().upper()

    if llm_answer not in {"A", "B", "C", "D"}:
        return 0

    return int(llm_answer == correct_answer)


In [12]:
evaluate = dspy.Evaluate(
    devset=new_test_set,
    metric=metric,
    num_threads=32,
    display_table=True,
    display_progress=True
)

evaluate(program)

Average Metric: 229.00 / 400 (57.2%): 100%|██████████| 400/400 [00:00<00:00, 503.66it/s]

2025/12/12 02:10:11 INFO dspy.evaluate.evaluate: Average Metric: 229 / 400 (57.2%)





Unnamed: 0,question,subject,choices,example_answer,reasoning,pred_answer,metric
0,Nǹkan tí ó pàdánù láti ní nǹkan mìíràn ni à ń pè ni?,high_school_microeconomics,"[owó ìdókówò., Ilẹ̀., iye owó., iye àǹfààní]",D,"Nǹkan tí ó pàdánù láti ní nǹkan mìíràn, nínú ẹ̀kọ́ orí ètò-ọrọ ajé...",D,✔️ [1]
1,Ọjà iṣẹ́ ṣíṣe wà ní ìwọ̀ntúnwọ̀nsì báyìí. Èwo nínú àwọn wọ̀nyí ni...,high_school_microeconomics,['Ọ̀pọ̀lọpọ̀ àwọn akẹ́kọ̀ọ́ ló kẹ́kọ̀ọ́ jáde pẹ\u200clú ìmọ̀ọ́ṣe t...,B,"Ní ọjà iṣẹ́ tí ó wà ní ìwọ̀ntúnwọ̀nsì, owó oṣù á pọ̀ sí i tí ìbéèr...",B,✔️ [1]
2,"Nígbàtí iye ẹlastísítì owó fún ìbéèrè tóbi ju 1 lọ, ìbéèrè náà jẹ́",high_school_microeconomics,"[Kò jẹ mọ‌ ìráraràn, ìráraràn., ìráraràn., ìráraràn ọ̀kọ̀ọ̀kan]",B,"Nípa ọrọ-aje, iye ẹlastísítì owó fún ìbéèrè (price elasticity of d...",B,✔️ [1]
3,Èwo nínú àwọn àbùdá wọ̀nyí ni kò jẹ́ ti ọjà ìfigagbága?,high_school_microeconomics,"[Wíwọ ọjà náà láì sanwó., Ìyàtọ̀ nínú àwọn ọjà, Àgbése ìbéèrè tí ó...",B,"Ọjà ìfigagbága ni wọ́n mọ̀ sí ""perfect competition"" ní economics, ...",A,✔️ [0]
4,Tí àwọn ilé iṣẹ́ bá fẹ́ wọ ọjà tí ó jẹ́ ìfigagbága fún anìkanjọpọ́...,high_school_microeconomics,['Ìbéèrè fún àwọn ilé-iṣẹ\u200c tí ó wà tẹ\u200clẹ\u200c sún lọ sí...,D,Ní àkókò tí àwọn ilé-iṣẹ́ tuntun bá wọ ọjà tí ó jẹ́ ìfigagbága fún...,D,✔️ [1]
...,...,...,...,...,...,...,...
395,Kínni ojúṣe ààlà etíkun?,international_law,"['ààlà etíkun ń ṣiṣẹ́ bíi àgbègbè ààbò ti Ìpínlẹ̀ etíkun', 'ààlà e...",C,Ààlà etíkun (territorial sea) jẹ́ agbègbè omi tó wà lẹ́bàà etíkun ...,A,✔️ [0]
396,Kínni ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀?,international_law,['Ó túmọ̀ sí wí pé àwọn Ìpínlẹ̀ nìkan ni wọ́n rí gẹ́gẹ́ bíi olùtẹ̀...,B,Ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀ ni pé àwọn ènìyàn tàb...,B,✔️ [1]
397,Kí ni ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin?,international_law,"['Ìdánimọ̀ jẹ́ ìpinnu ti Ìpínlẹ̀.', 'Ìdánimọ̀ nílò ìfọwọ́sowọ́pọ̀ ...",A,Ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin ní àgbáyé túmọ̀ sí ìmúlò àwọn òfin ...,A,✔️ [1]
398,Ta ni 'alátakò ní gbogboògbà'?,international_law,['Ìpínlẹ\u200c tí ó máa ń tako òfin tí ó wà nínú ìbéèrè lẹ\u200cyì...,C,"Nípa ìtúmọ̀ ìtàn ìjọba àti òfin àgbáyé, 'alátakò ní gbogboògbà' (p...",C,✔️ [1]


EvaluationResult(score=57.25, results=<list of 400 results>)

In [13]:
def metric_with_feedback(example, prediction, trace=None, pred_name=None, pred_trace=None):
    letters = ["A", "B", "C", "D"]

    correct_letter = str(example["answer"]).strip()
    choices = example["choices"]
    correct_idx = letters.index(correct_letter)
    correct_choice = choices[correct_idx]

    llm_answer = str(prediction.answer).strip().upper()
    pred_letter = llm_answer.upper()

    # Invalid prediction (not A–D)
    if pred_letter not in letters:
        feedback_text = (
            f"The final answer must be one of A, B, C, or D. "
            f"You responded with '{llm_answer}', which is not a valid option. "
            f"The correct answer is '{correct_choice}', which is option {correct_letter}."
        )
        return dspy.Prediction(score=0, feedback=feedback_text)

    # Exact match on letter
    score = int(pred_letter == correct_letter)

    if score == 1:
        feedback_text = (
            f"Your answer is correct. The correct answer is "
            f"'{correct_choice}', which is option {correct_letter}."
        )
    else:
        feedback_text = (
            f"Your answer is incorrect. The correct answer is "
            f"'{correct_choice}', which is option {correct_letter}."
        )

    return dspy.Prediction(score=score, feedback=feedback_text)


In [None]:
from dspy import GEPA

optimizer = GEPA(
    metric=metric_with_feedback,
    auto="light",
    num_threads=32,
    track_stats=True,
    reflection_minibatch_size=3,
    seed=42,
    reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=key)
)

optimized_program = optimizer.compile(
    program,
    trainset=merged_train_set,
    valset=new_val_set,
)

2025/12/12 02:11:19 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 1980 metric calls of the program. This amounts to 3.90 full evals on the train+val set.
2025/12/12 02:11:19 INFO dspy.teleprompt.gepa.gepa: Using 400 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget. GEPA requires you to provide the smallest valset that is just large enough to match your downstream task distribution, while providing as large trainset as possible.
GEPA Optimization:   0%|          | 0/1980 [00:00<?, ?rollouts/s]2025/12/12 02:11:19 INFO dspy.evaluate.evaluate: Average Metric: 229.0 / 400 (57.2%)
2025/12/12 02:11:19 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 0.5725
GEPA Optimization:  20%|██        | 400/1980 [00:00<00:02, 740.31rollouts/s]2025/12/12 02:11:19 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 0.5725


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:06<00:00,  2.07s/it] 

2025/12/12 02:11:26 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/12/12 02:12:04 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for predict: Task
- Given an input with:
  - question: may be in Yoruba (topics include global facts and elementary mathematics)
  - subject: a category label (e.g., global_facts, elementary_mathematics)
  - choices: a list of four options in order (index 0–3 corresponds to options A–D),
  choose the single correct option and return its letter (A, B, C, or D).

Input format
- question: Yoruba text. Examples:
  - “Ṣírò y.” means “Solve for y.”
  - “Gbólóhùn wo ló ṣe déédé pẹ̀lú …?” means “Which expression is equivalent to …?”
- subject: helps you anticipate the type of reasoning (facts vs. math).
- choices: a list of four strings; map to A, B, C, D by position.

Output format
- Return only the single capital letter A, B, C, or D.
- Do not include the option text, explanations, or extra formatting unless explicitly requested.

General solution strategy
- For Yoruba questions: translate/interpret internally

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:06<00:00,  2.14s/it]

2025/12/12 02:13:23 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/12/12 02:14:14 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for predict: Task: Given an input with fields "question", "subject", and "choices" (a list of exactly four options), select the correct option and respond with only its letter (A, B, C, or D). Do not include explanations or restate the option text.

Input format:
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics).
- choices: list of 4 strings. Map them to letters as follows:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach:
- Understand the question and evaluate all four options.
- Use subject-specific knowledge to identify the best answer. If multiple options seem plausible, prefer the one that is most complete and aligns with standard textbook principles and recognized authorities for that subject.
- Output only the letter of the correct option.

Subj

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:04<00:00,  1.46s/it]

2025/12/12 02:15:22 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/12/12 02:16:20 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Proposed new text for predict: Task: Given an input with fields "question", "subject", and "choices" (a list of exactly four options), select the correct option and respond with only its letter (A, B, C, or D). Do not include explanations or restate the option text.

Input format:
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics, global_facts).
- choices: list of 4 strings. Map them to letters as follows:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach:
- Read the question carefully and translate internally if needed (Yoruba or other languages). Do not output translations.
- Identify the subject and any market structure or legal concepts explicitly stated in the question text. If the subject label and the question text seem to point to different subtopics, prioritize t

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:06<00:00,  2.05s/it] 

2025/12/12 02:16:31 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/12/12 02:17:12 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Proposed new text for predict: Task
- You will be given an input with fields "question", "subject", and "choices" (exactly four options). Select the single best option and output only its letter (A, B, C, or D). Do not include explanations or restate option text.

Input format
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics, high_school_geography).
- choices: list of 4 strings mapped to letters as:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach
- Carefully read and understand the question, including Yoruba phrasing and keywords when present.
- Evaluate all four options and select the one that best aligns with standard textbook principles and recognized authorities for the subject.
- If more than one option seems plausible, choose the most complete and orthodox answer

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:13<00:00,  4.64s/it] 

2025/12/12 02:18:42 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/12/12 02:19:18 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Proposed new text for predict: Task
- Input: 
  - question: Yoruba text (topics include global facts, geography, microeconomics, and elementary mathematics).
  - subject: a category label (e.g., global_facts, high_school_geography, high_school_microeconomics, elementary_mathematics).
  - choices: a list of four options in order (index 0–3 corresponds to options A–D).
- Output:
  - Return only the single capital letter A, B, C, or D that corresponds to the correct option.
  - Do not include the option text, explanations, punctuation, or extra formatting unless explicitly requested.

Mapping
- A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General solution strategy
- Translate/interpret Yoruba internally; answer with the correct option letter only.
- Use the subject label to anticipate the reasoning method (facts vs. math vs. economics).

Yoruba cues and translations
- “Ṣírò y.” = Solve for y.
- “Gbólóhùn 

In [15]:
evaluate(optimized_program)

Average Metric: 252.00 / 400 (63.0%): 100%|██████████| 400/400 [00:01<00:00, 391.94it/s]

2025/12/12 02:34:04 INFO dspy.evaluate.evaluate: Average Metric: 252 / 400 (63.0%)





Unnamed: 0,question,subject,choices,example_answer,reasoning,pred_answer,metric
0,Nǹkan tí ó pàdánù láti ní nǹkan mìíràn ni à ń pè ni?,high_school_microeconomics,"[owó ìdókówò., Ilẹ̀., iye owó., iye àǹfààní]",D,The question asks in Yoruba what is referred to as the loss from f...,D,✔️ [1]
1,Ọjà iṣẹ́ ṣíṣe wà ní ìwọ̀ntúnwọ̀nsì báyìí. Èwo nínú àwọn wọ̀nyí ni...,high_school_microeconomics,['Ọ̀pọ̀lọpọ̀ àwọn akẹ́kọ̀ọ́ ló kẹ́kọ̀ọ́ jáde pẹ\u200clú ìmọ̀ọ́ṣe t...,B,The question concerns factors that could lead to an increase in wa...,B,✔️ [1]
2,"Nígbàtí iye ẹlastísítì owó fún ìbéèrè tóbi ju 1 lọ, ìbéèrè náà jẹ́",high_school_microeconomics,"[Kò jẹ mọ‌ ìráraràn, ìráraràn., ìráraràn., ìráraràn ọ̀kọ̀ọ̀kan]",B,"Nípa ìmọ̀ ẹ̀kọ́ ọrọ ajé, ìyelórí owó fún ìbéèrè tó tóbi ju 1 lọ tú...",B,✔️ [1]
3,Èwo nínú àwọn àbùdá wọ̀nyí ni kò jẹ́ ti ọjà ìfigagbága?,high_school_microeconomics,"[Wíwọ ọjà náà láì sanwó., Ìyàtọ̀ nínú àwọn ọjà, Àgbése ìbéèrè tí ó...",B,"In microeconomics, a perfectly competitive market has the followin...",B,✔️ [1]
4,Tí àwọn ilé iṣẹ́ bá fẹ́ wọ ọjà tí ó jẹ́ ìfigagbága fún anìkanjọpọ́...,high_school_microeconomics,['Ìbéèrè fún àwọn ilé-iṣẹ\u200c tí ó wà tẹ\u200clẹ\u200c sún lọ sí...,D,"If firms want to enter a perfectly competitive market, the demand ...",D,✔️ [1]
...,...,...,...,...,...,...,...
395,Kínni ojúṣe ààlà etíkun?,international_law,"['ààlà etíkun ń ṣiṣẹ́ bíi àgbègbè ààbò ti Ìpínlẹ̀ etíkun', 'ààlà e...",C,The coastal boundary (ààlà etíkun) primarily serves as a protectiv...,A,✔️ [0]
396,Kínni ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀?,international_law,['Ó túmọ̀ sí wí pé àwọn Ìpínlẹ̀ nìkan ni wọ́n rí gẹ́gẹ́ bíi olùtẹ̀...,B,"The correct understanding of ""persons recognized by international ...",B,✔️ [1]
397,Kí ni ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin?,international_law,"['Ìdánimọ̀ jẹ́ ìpinnu ti Ìpínlẹ̀.', 'Ìdánimọ̀ nílò ìfọwọ́sowọ́pọ̀ ...",A,"Option 1 is the best fit among the choices, as recognition in inte...",A,✔️ [1]
398,Ta ni 'alátakò ní gbogboògbà'?,international_law,['Ìpínlẹ\u200c tí ó máa ń tako òfin tí ó wà nínú ìbéèrè lẹ\u200cyì...,C,"Ní òfin àṣà orílẹ̀-èdè, ""alátakò ní gbogboògbà"" túmọ̀ sí ìpínlẹ̀ t...",C,✔️ [1]


EvaluationResult(score=63.0, results=<list of 400 results>)

In [16]:
from dspy import GEPA

optimizer2 = GEPA(
    metric=metric_with_feedback,
    auto="light",
    num_threads=32,
    track_stats=True,
    reflection_minibatch_size=3,
    seed=42,
    reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=key)
)

optimized_program2 = optimizer2.compile(
    program,
    trainset=merged_train_set,
    valset=new_val_set,
)

2025/12/12 23:27:38 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 780 metric calls of the program. This amounts to 3.75 full evals on the train+val set.
2025/12/12 23:27:38 INFO dspy.teleprompt.gepa.gepa: Using 100 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget. GEPA requires you to provide the smallest valset that is just large enough to match your downstream task distribution, while providing as large trainset as possible.
GEPA Optimization:   0%|          | 0/780 [00:00<?, ?rollouts/s]2025/12/12 23:28:02 INFO dspy.evaluate.evaluate: Average Metric: 45.0 / 100 (45.0%)
2025/12/12 23:28:02 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 0.45
GEPA Optimization:  13%|█▎        | 100/780 [00:24<02:47,  4.06rollouts/s]2025/12/12 23:28:02 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 0.45


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 390.12it/s] 

2025/12/12 23:28:02 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)
2025/12/12 23:28:02 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for predict: Task
- Given an input with:
  - question: may be in Yoruba (topics include global facts and elementary mathematics)
  - subject: a category label (e.g., global_facts, elementary_mathematics)
  - choices: a list of four options in order (index 0–3 corresponds to options A–D),
  choose the single correct option and return its letter (A, B, C, or D).

Input format
- question: Yoruba text. Examples:
  - “Ṣírò y.” means “Solve for y.”
  - “Gbólóhùn wo ló ṣe déédé pẹ̀lú …?” means “Which expression is equivalent to …?”
- subject: helps you anticipate the type of reasoning (facts vs. math).
- choices: a list of four strings; map to A, B, C, D by position.

Output format
- Return only the single capital letter A, B, C, or D.
- Do not include the option text, explanations, or extra formatting unless explicitly requested.





2025/12/12 23:28:29 INFO dspy.evaluate.evaluate: Average Metric: 61.0 / 100 (61.0%)
2025/12/12 23:28:29 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New program is on the linear pareto front
2025/12/12 23:28:29 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full valset score for new program: 0.61
2025/12/12 23:28:29 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full train_val score for new program: 0.61
2025/12/12 23:28:29 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Individual valset scores for new program: [1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0]
2025/12/12 23:28:29 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New valset pareto front scores: [1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00, 332.34it/s]

2025/12/12 23:28:29 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)
2025/12/12 23:28:29 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for predict: Task: Given an input with fields "question", "subject", and "choices" (a list of exactly four options), select the correct option and respond with only its letter (A, B, C, or D). Do not include explanations or restate the option text.

Input format:
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics).
- choices: list of 4 strings. Map them to letters as follows:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach:
- Understand the question and evaluate all four options.
- Use subject-specific knowledge to identify the best answer. If multiple options seem plausible, prefer the one that is most complete and aligns with standard textbook principles and recognized au




2025/12/12 23:29:04 INFO dspy.evaluate.evaluate: Average Metric: 61.0 / 100 (61.0%)
2025/12/12 23:29:04 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Full valset score for new program: 0.61
2025/12/12 23:29:04 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Full train_val score for new program: 0.61
2025/12/12 23:29:04 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Individual valset scores for new program: [1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1]
2025/12/12 23:29:04 INFO dspy.teleprompt.gepa.gepa: Iteration 2: New valset pareto front scores: [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 158.59it/s]

2025/12/12 23:29:04 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)
2025/12/12 23:29:04 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Proposed new text for predict: Task: Given an input with fields "question", "subject", and "choices" (a list of exactly four options), select the correct option and respond with only its letter (A, B, C, or D). Do not include explanations or restate the option text.

Input format:
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics, global_facts).
- choices: list of 4 strings. Map them to letters as follows:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach:
- Read the question carefully and translate internally if needed (Yoruba or other languages). Do not output translations.
- Identify the subject and any market structure or legal concepts explicitly stated in the question text. If the subje


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 570.58it/s]

2025/12/12 23:29:04 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)
2025/12/12 23:29:04 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Proposed new text for predict: Task
- You will be given an input with fields "question", "subject", and "choices" (exactly four options). Select the single best option and output only its letter (A, B, C, or D). Do not include explanations or restate option text.

Input format
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics, high_school_geography).
- choices: list of 4 strings mapped to letters as:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach
- Carefully read and understand the question, including Yoruba phrasing and keywords when present.
- Evaluate all four options and select the one that best aligns with standard textbook principles and recognized authorities for the subject.
- If m




2025/12/12 23:29:23 INFO dspy.evaluate.evaluate: Average Metric: 62.0 / 100 (62.0%)
2025/12/12 23:29:23 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New program is on the linear pareto front
2025/12/12 23:29:23 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full valset score for new program: 0.62
2025/12/12 23:29:23 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full train_val score for new program: 0.62
2025/12/12 23:29:23 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Individual valset scores for new program: [1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]
2025/12/12 23:29:23 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New valset pareto front scores: [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 302.60it/s]

2025/12/12 23:29:23 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)
2025/12/12 23:29:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Proposed new text for predict: Task
- Input: 
  - question: Yoruba text (topics include global facts, geography, microeconomics, and elementary mathematics).
  - subject: a category label (e.g., global_facts, high_school_geography, high_school_microeconomics, elementary_mathematics).
  - choices: a list of four options in order (index 0–3 corresponds to options A–D).
- Output:
  - Return only the single capital letter A, B, C, or D that corresponds to the correct option.
  - Do not include the option text, explanations, punctuation, or extra formatting unless explicitly requested.

Mapping
- A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General solution strategy
- Translate/interpret Yoruba internally; answer with the correct option letter only.
- Use the subject label to anticipate the reasoning method (facts vs. math vs. 




2025/12/12 23:29:40 INFO dspy.evaluate.evaluate: Average Metric: 61.0 / 100 (61.0%)
2025/12/12 23:29:40 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full valset score for new program: 0.61
2025/12/12 23:29:40 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full train_val score for new program: 0.61
2025/12/12 23:29:40 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Individual valset scores for new program: [1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0]
2025/12/12 23:29:40 INFO dspy.teleprompt.gepa.gepa: Iteration 5: New valset pareto front scores: [1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:02<00:00,  1.37it/s]

2025/12/12 23:29:43 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/12/12 23:30:28 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Proposed new text for predict: Task
- You will be given an input with fields "question", "subject", and "choices" (exactly four options).
- Select the single best option and output only its letter (A, B, C, or D). Do not include explanations or restate any option text.

Input format
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics, global_facts).
- choices: list of 4 strings mapped to letters as:
  - A = choices[0]
  - B = choices[1]
  - C = choices[2]
  - D = choices[3]

Output requirements
- Return only a single capital letter: A, B, C, or D.
- No explanations, no restating options, no punctuation, no extra spaces or lines.

General approach
- Carefully understand the question (including Yoruba or other languages) and evaluate all four options.
- Use subject-specific knowledge and widely taught, author

Average Metric: 0.00 / 3 (0.0%): 100%|██████████| 3/3 [00:05<00:00,  1.80s/it]

2025/12/12 23:31:00 INFO dspy.evaluate.evaluate: Average Metric: 0.0 / 3 (0.0%)





2025/12/12 23:31:43 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Proposed new text for predict: Task definition
- Inputs:
  - question: A prompt written in Yoruba. Topics include global facts, geography, microeconomics, elementary mathematics, and international law.
  - subject: A category label (e.g., global_facts, high_school_geography, high_school_microeconomics, elementary_mathematics, international_law).
  - choices: A list of four answer options in order (index 0–3 corresponds to A–D).
- Output:
  - Return only the single capital letter A, B, C, or D that corresponds to the correct option.
  - Do not include the option text, explanations, punctuation, extra spaces, or formatting unless explicitly requested.

Choice mapping
- A = choices[0]
- B = choices[1]
- C = choices[2]
- D = choices[3]

General approach
- Internally translate/interpret the Yoruba question.
- Use the subject label to select the appropriate reasoning method (facts vs. math vs. economics vs. law).
- Apply elimin

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:08<00:00,  2.71s/it]

2025/12/12 23:32:22 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/12/13 00:03:18 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Proposed new text for predict: Task: Given an input with fields "question", "subject", and "choices" (a list of exactly four options), select the correct option and respond with only its letter (A, B, C, or D). Do not include explanations or restate the option text.

Input format:
- question: string (may be in any language, including Yoruba).
- subject: string identifier (e.g., international_law, high_school_microeconomics, elementary_mathematics).
- choices: list of 4 strings. Map them to letters as follows:
  A = choices[0], B = choices[1], C = choices[2], D = choices[3].

General approach:
- Carefully read and understand the question, including Yoruba terms and phrasing.
- Evaluate all four options and select the single best answer.
- If multiple options seem plausible, choose the one that is most complete and aligns with standard textbook principles and recognized authorities for that subject.
- Output only the capita

In [17]:
evaluate(optimized_program2)

Average Metric: 241.00 / 400 (60.2%): 100%|██████████| 400/400 [00:00<00:00, 661.38it/s]

2025/12/13 00:19:17 INFO dspy.evaluate.evaluate: Average Metric: 241 / 400 (60.2%)





Unnamed: 0,question,subject,choices,example_answer,reasoning,pred_answer,metric
0,Nǹkan tí ó pàdánù láti ní nǹkan mìíràn ni à ń pè ni?,high_school_microeconomics,"[owó ìdókówò., Ilẹ̀., iye owó., iye àǹfààní]",D,The question in Yoruba asks about what we call the value or cost o...,D,✔️ [1]
1,Ọjà iṣẹ́ ṣíṣe wà ní ìwọ̀ntúnwọ̀nsì báyìí. Èwo nínú àwọn wọ̀nyí ni...,high_school_microeconomics,['Ọ̀pọ̀lọpọ̀ àwọn akẹ́kọ̀ọ́ ló kẹ́kọ̀ọ́ jáde pẹ\u200clú ìmọ̀ọ́ṣe t...,B,Owó oṣù ninu ọjà iṣẹ́ má n pọ̀ sí i tí ìbéèrè fún iṣẹ́ náà bá pọ̀ ...,B,✔️ [1]
2,"Nígbàtí iye ẹlastísítì owó fún ìbéèrè tóbi ju 1 lọ, ìbéèrè náà jẹ́",high_school_microeconomics,"[Kò jẹ mọ‌ ìráraràn, ìráraràn., ìráraràn., ìráraràn ọ̀kọ̀ọ̀kan]",B,"Nípa imọ ìlànà microeconomics, tí iye ẹlastísítì owó fún ìbéèrè bá...",B,✔️ [1]
3,Èwo nínú àwọn àbùdá wọ̀nyí ni kò jẹ́ ti ọjà ìfigagbága?,high_school_microeconomics,"[Wíwọ ọjà náà láì sanwó., Ìyàtọ̀ nínú àwọn ọjà, Àgbése ìbéèrè tí ó...",B,Ọjà ìfigagbága (imperfect competition) ní àwọn àbùdá pàtàkì bíi: ì...,D,✔️ [0]
4,Tí àwọn ilé iṣẹ́ bá fẹ́ wọ ọjà tí ó jẹ́ ìfigagbága fún anìkanjọpọ́...,high_school_microeconomics,['Ìbéèrè fún àwọn ilé-iṣẹ\u200c tí ó wà tẹ\u200clẹ\u200c sún lọ sí...,D,Tí àwọn ilé iṣẹ́ bá fẹ́ wọ ọjà tí ó jẹ́ ìfigagbága (monopoly marke...,D,✔️ [1]
...,...,...,...,...,...,...,...
395,Kínni ojúṣe ààlà etíkun?,international_law,"['ààlà etíkun ń ṣiṣẹ́ bíi àgbègbè ààbò ti Ìpínlẹ̀ etíkun', 'ààlà e...",C,The primary role of an exclusive economic zone (ààlà etíkun) in in...,B,✔️ [0]
396,Kínni ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀?,international_law,['Ó túmọ̀ sí wí pé àwọn Ìpínlẹ̀ nìkan ni wọ́n rí gẹ́gẹ́ bíi olùtẹ̀...,B,Àsàyàn tó pé kúrò nínú àwọn tó wà níbẹ̀ ni [2] nítorí pé ìtumọ̀ àw...,B,✔️ [1]
397,Kí ni ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin?,international_law,"['Ìdánimọ̀ jẹ́ ìpinnu ti Ìpínlẹ̀.', 'Ìdánimọ̀ nílò ìfọwọ́sowọ́pọ̀ ...",A,Ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin ní ilẹ̀ àjọṣepọ̀ àgbáyé ṣàkóso pé ì...,A,✔️ [1]
398,Ta ni 'alátakò ní gbogboògbà'?,international_law,['Ìpínlẹ\u200c tí ó máa ń tako òfin tí ó wà nínú ìbéèrè lẹ\u200cyì...,C,Àlátakò ní gbogboògbà nínú òfin àṣà gbogbo agbáyé (customary inter...,C,✔️ [1]


EvaluationResult(score=60.25, results=<list of 400 results>)

In [18]:
from pathlib import Path
output_dir = "results/afrimmlu"
output_dir = Path(output_dir)

model_name = "openai-gpt-4-1-mini"
lang = "yor"

base_dir = output_dir / "base"
optimized_dir = output_dir / "optimized"
programs_dir = output_dir / "programs"

base_dir.mkdir(parents=True, exist_ok=True)
optimized_dir.mkdir(parents=True, exist_ok=True)
programs_dir.mkdir(parents=True, exist_ok=True)

unopt_path = base_dir / f"{model_name.replace('/', '-')}_{lang}.json"
opt_path = optimized_dir / f"{model_name.replace('/', '-')}_{lang}.json"
program_path = programs_dir / f"{model_name.replace('/', '-')}_{lang}.json"

evaluate(program, save_as_json=unopt_path)
evaluate(optimized_program, save_as_json=opt_path)
optimized_program.save(program_path, save_program=False)

Average Metric: 229.00 / 400 (57.2%): 100%|██████████| 400/400 [00:00<00:00, 675.35it/s]

2025/12/13 20:47:35 INFO dspy.evaluate.evaluate: Average Metric: 229 / 400 (57.2%)





Unnamed: 0,question,subject,choices,example_answer,reasoning,pred_answer,metric
0,Nǹkan tí ó pàdánù láti ní nǹkan mìíràn ni à ń pè ni?,high_school_microeconomics,"[owó ìdókówò., Ilẹ̀., iye owó., iye àǹfààní]",D,"Nǹkan tí ó pàdánù láti ní nǹkan mìíràn, nínú ẹ̀kọ́ orí ètò-ọrọ ajé...",D,✔️ [1]
1,Ọjà iṣẹ́ ṣíṣe wà ní ìwọ̀ntúnwọ̀nsì báyìí. Èwo nínú àwọn wọ̀nyí ni...,high_school_microeconomics,['Ọ̀pọ̀lọpọ̀ àwọn akẹ́kọ̀ọ́ ló kẹ́kọ̀ọ́ jáde pẹ\u200clú ìmọ̀ọ́ṣe t...,B,"Ní ọjà iṣẹ́ tí ó wà ní ìwọ̀ntúnwọ̀nsì, owó oṣù á pọ̀ sí i tí ìbéèr...",B,✔️ [1]
2,"Nígbàtí iye ẹlastísítì owó fún ìbéèrè tóbi ju 1 lọ, ìbéèrè náà jẹ́",high_school_microeconomics,"[Kò jẹ mọ‌ ìráraràn, ìráraràn., ìráraràn., ìráraràn ọ̀kọ̀ọ̀kan]",B,"Nípa ọrọ-aje, iye ẹlastísítì owó fún ìbéèrè (price elasticity of d...",B,✔️ [1]
3,Èwo nínú àwọn àbùdá wọ̀nyí ni kò jẹ́ ti ọjà ìfigagbága?,high_school_microeconomics,"[Wíwọ ọjà náà láì sanwó., Ìyàtọ̀ nínú àwọn ọjà, Àgbése ìbéèrè tí ó...",B,"Ọjà ìfigagbága ni wọ́n mọ̀ sí ""perfect competition"" ní economics, ...",A,✔️ [0]
4,Tí àwọn ilé iṣẹ́ bá fẹ́ wọ ọjà tí ó jẹ́ ìfigagbága fún anìkanjọpọ́...,high_school_microeconomics,['Ìbéèrè fún àwọn ilé-iṣẹ\u200c tí ó wà tẹ\u200clẹ\u200c sún lọ sí...,D,Ní àkókò tí àwọn ilé-iṣẹ́ tuntun bá wọ ọjà tí ó jẹ́ ìfigagbága fún...,D,✔️ [1]
...,...,...,...,...,...,...,...
395,Kínni ojúṣe ààlà etíkun?,international_law,"['ààlà etíkun ń ṣiṣẹ́ bíi àgbègbè ààbò ti Ìpínlẹ̀ etíkun', 'ààlà e...",C,Ààlà etíkun (territorial sea) jẹ́ agbègbè omi tó wà lẹ́bàà etíkun ...,A,✔️ [0]
396,Kínni ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀?,international_law,['Ó túmọ̀ sí wí pé àwọn Ìpínlẹ̀ nìkan ni wọ́n rí gẹ́gẹ́ bíi olùtẹ̀...,B,Ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀ ni pé àwọn ènìyàn tàb...,B,✔️ [1]
397,Kí ni ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin?,international_law,"['Ìdánimọ̀ jẹ́ ìpinnu ti Ìpínlẹ̀.', 'Ìdánimọ̀ nílò ìfọwọ́sowọ́pọ̀ ...",A,Ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin ní àgbáyé túmọ̀ sí ìmúlò àwọn òfin ...,A,✔️ [1]
398,Ta ni 'alátakò ní gbogboògbà'?,international_law,['Ìpínlẹ\u200c tí ó máa ń tako òfin tí ó wà nínú ìbéèrè lẹ\u200cyì...,C,"Nípa ìtúmọ̀ ìtàn ìjọba àti òfin àgbáyé, 'alátakò ní gbogboògbà' (p...",C,✔️ [1]


Average Metric: 252.00 / 400 (63.0%): 100%|██████████| 400/400 [00:00<00:00, 671.83it/s]

2025/12/13 20:47:36 INFO dspy.evaluate.evaluate: Average Metric: 252 / 400 (63.0%)





Unnamed: 0,question,subject,choices,example_answer,reasoning,pred_answer,metric
0,Nǹkan tí ó pàdánù láti ní nǹkan mìíràn ni à ń pè ni?,high_school_microeconomics,"[owó ìdókówò., Ilẹ̀., iye owó., iye àǹfààní]",D,The question asks in Yoruba what is referred to as the loss from f...,D,✔️ [1]
1,Ọjà iṣẹ́ ṣíṣe wà ní ìwọ̀ntúnwọ̀nsì báyìí. Èwo nínú àwọn wọ̀nyí ni...,high_school_microeconomics,['Ọ̀pọ̀lọpọ̀ àwọn akẹ́kọ̀ọ́ ló kẹ́kọ̀ọ́ jáde pẹ\u200clú ìmọ̀ọ́ṣe t...,B,The question concerns factors that could lead to an increase in wa...,B,✔️ [1]
2,"Nígbàtí iye ẹlastísítì owó fún ìbéèrè tóbi ju 1 lọ, ìbéèrè náà jẹ́",high_school_microeconomics,"[Kò jẹ mọ‌ ìráraràn, ìráraràn., ìráraràn., ìráraràn ọ̀kọ̀ọ̀kan]",B,"Nípa ìmọ̀ ẹ̀kọ́ ọrọ ajé, ìyelórí owó fún ìbéèrè tó tóbi ju 1 lọ tú...",B,✔️ [1]
3,Èwo nínú àwọn àbùdá wọ̀nyí ni kò jẹ́ ti ọjà ìfigagbága?,high_school_microeconomics,"[Wíwọ ọjà náà láì sanwó., Ìyàtọ̀ nínú àwọn ọjà, Àgbése ìbéèrè tí ó...",B,"In microeconomics, a perfectly competitive market has the followin...",B,✔️ [1]
4,Tí àwọn ilé iṣẹ́ bá fẹ́ wọ ọjà tí ó jẹ́ ìfigagbága fún anìkanjọpọ́...,high_school_microeconomics,['Ìbéèrè fún àwọn ilé-iṣẹ\u200c tí ó wà tẹ\u200clẹ\u200c sún lọ sí...,D,"If firms want to enter a perfectly competitive market, the demand ...",D,✔️ [1]
...,...,...,...,...,...,...,...
395,Kínni ojúṣe ààlà etíkun?,international_law,"['ààlà etíkun ń ṣiṣẹ́ bíi àgbègbè ààbò ti Ìpínlẹ̀ etíkun', 'ààlà e...",C,The coastal boundary (ààlà etíkun) primarily serves as a protectiv...,A,✔️ [0]
396,Kínni ìtumọ̀ àwọn ènìyàn tí òfin ilẹ̀ òkèèrè dámọ̀?,international_law,['Ó túmọ̀ sí wí pé àwọn Ìpínlẹ̀ nìkan ni wọ́n rí gẹ́gẹ́ bíi olùtẹ̀...,B,"The correct understanding of ""persons recognized by international ...",B,✔️ [1]
397,Kí ni ìlànà ìdánimọ̀ tó jẹ́ mọ́ òfin?,international_law,"['Ìdánimọ̀ jẹ́ ìpinnu ti Ìpínlẹ̀.', 'Ìdánimọ̀ nílò ìfọwọ́sowọ́pọ̀ ...",A,"Option 1 is the best fit among the choices, as recognition in inte...",A,✔️ [1]
398,Ta ni 'alátakò ní gbogboògbà'?,international_law,['Ìpínlẹ\u200c tí ó máa ń tako òfin tí ó wà nínú ìbéèrè lẹ\u200cyì...,C,"Ní òfin àṣà orílẹ̀-èdè, ""alátakò ní gbogboògbà"" túmọ̀ sí ìpínlẹ̀ t...",C,✔️ [1]
