In [1]:
from datasets import load_dataset
import random
import string

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
REFUSE_CHOICE = "Insufficient information to answer the question"
ALPHABET = string.ascii_uppercase

In [2]:
def randomize_choices(ideal: str, distractors: list[str]) -> tuple[list[str], str, str]:
    # Combines correct answer, "Insufficient information" option, and distractors
    choices = [ideal, REFUSE_CHOICE, *distractors]
    
    # Creates letter choices (A, B, C, etc)
    n_choices = len(choices)
    if n_choices > len(ALPHABET):
        raise ValueError("Too many choices")

    # Randomizes the order
    perm = list(range(n_choices))
    random.shuffle(perm)
    shuffled_choices = [
        f"({letter}) {choices[sigma_i]}"
        for letter, sigma_i in zip(ALPHABET, perm, strict=False)
    ]

    # Returns the correct answer letter and "unsure" option letter
    answer = ALPHABET[perm.index(0)]
    unsure = ALPHABET[perm.index(1)]

    return shuffled_choices, answer, unsure

In [4]:
def format_as_multiple_choice(question_data):
    """Convert a dataset entry to multiple choice format."""
    choices, answer, unsure = randomize_choices(
        ideal=question_data['ideal'],
        distractors=question_data['distractors']
    )
    
    return {
        'question': question_data['question'],
        'choices': choices,
        'correct_answer': answer,
        'unsure_option': unsure,
        'sources': question_data['sources']
    }


In [5]:
train_eval = load_dataset("futurehouse/lab-bench", "LitQA2")["train"]

In [6]:
example = format_as_multiple_choice(train_eval[0])
print("Sample Multiple Choice Question:")
print(f"Question: {example['question']}\n")
print("Choices:")
for choice in example['choices']:
    print(choice)
print(f"\nCorrect Answer: {example['correct_answer']}")
print(f"'Unsure' Option: {example['unsure_option']}")
print(f"Sources: {example['sources']}")

Sample Multiple Choice Question:
Question: Acinetobacter lwoffii has been evolved in the lab to be resistant to which of these antibiotics?

Choices:
(A) ciproflaxin
(B) ampicillin
(C) gentamicin
(D) meropenem
(E) Insufficient information to answer the question

Correct Answer: A
'Unsure' Option: E
Sources: ['https://doi.org/10.1128/msphere.00109-24']
