### [Demo] Apply different prompt templates in a dataset

TLDR: This demo, create a list of prompts templates using unitxt. Following we create new dataset tasks formated with new prompts to be evaluated.

In [None]:
import json
import os
import sys
import warnings

from lm_eval.prompts.prompt_template_utils import build_prompts_variations_str_template

warnings.filterwarnings("ignore")

In [None]:
# Get the current working directory
current_dir = os.getcwd()
# Navigate to the root path
LM_EVAL_ROOT_PATH = os.path.abspath(os.path.join(current_dir, "../"))
sys.path.insert(0, LM_EVAL_ROOT_PATH)

Install unitxt package

In [None]:
# !pip install unitxt

1. Create the templates variation for multi QA template that can be applied to a given dataset. Initial template is required. 

In [None]:
# Default template from MMLU
multi_qa_template = "The following are multiple choice questions (with answers) about {topic}.\n{question}.\nAnswers: \n{choices}.\nAnswer:"

templates = build_prompts_variations_str_template(
    template_str=multi_qa_template, dataset_name="mmlu", templates_folder="assets", num_variations=5
)

In [None]:
# save raw templates variations
hf_dataset_name = "cais/mmlu"
templates_folder = f"{LM_EVAL_ROOT_PATH}/examples/assets"

os.makedirs(templates_folder, exist_ok=True)
dataset_name = hf_dataset_name.split("/", 1)[1] if "/" in hf_dataset_name else ""
with open(f"{templates_folder}/{dataset_name}_templates.json", "r") as file:
    raw_templates = json.load(file)

raw_templates

**Unitxt utils**

In [None]:
from unitxt import add_to_catalog, get_from_catalog
from unitxt.blocks import LoadHF, Set, TaskCard
from unitxt.splitters import RenameSplits
from unitxt.templates import MultipleChoiceTemplate, TemplatesList


def write_card_yaml(filename, data):
    import yaml

    with open(filename, "w") as stream:
        yaml.dump(data, stream, sort_keys=False)

In [None]:
# create the create using unitxt Template Object and add into the unitxt local catalog

prompt_templates = {"mmlu_with_topic": raw_templates}
template_handles = []

for template_type, template_group in prompt_templates.items():
    for index, input_format in enumerate(template_group):
        template = MultipleChoiceTemplate(
            input_format=input_format,
            target_field="answer",
            choices_separator="\n",
            postprocessors=["processors.first_character"],
        )
        template_handle = f"templates.qa.multiple_choice.{template_type}.pt_variation_{index}"
        template_handles.append(template_handle)
        add_to_catalog(template, template_handle, overwrite=True)

# save the template handles as a list in the catalog
templates_list_catalog_name = "templates.qa.multiple_choice.mmlu_with_topic.all_pt_variations"
add_to_catalog(
    artifact=TemplatesList(template_handles),
    name=templates_list_catalog_name,
    overwrite=True,
)

In [None]:
# fetch the templates list from the catalog
_templates_list: TemplatesList = get_from_catalog(templates_list_catalog_name)  # catalog_path=my_unitxt_catalog

template_list = []
for template in _templates_list.items:
    template_list.append(template.get_pretty_print_name())

template_list

In [None]:
# list of mmlu tasks:
dataset_name = "mmlu"

# fmt: off
subtasks = [
    "abstract_algebra","anatomy","astronomy","business_ethics","clinical_knowledge",
    "college_biology","college_chemistry","college_computer_science","college_mathematics",
    "college_medicine","college_physics","computer_security","conceptual_physics","econometrics",
    "electrical_engineering","elementary_mathematics","formal_logic","global_facts",
    "high_school_biology","high_school_chemistry","high_school_computer_science",
    "high_school_european_history","high_school_geography","high_school_government_and_politics",
    "high_school_macroeconomics","high_school_mathematics","high_school_microeconomics",
    "high_school_physics","high_school_psychology","high_school_statistics","high_school_us_history",
    "high_school_world_history","human_aging","human_sexuality","international_law","jurisprudence",
    "logical_fallacies","machine_learning","management","marketing","medical_genetics","miscellaneous",
    "moral_disputes","moral_scenarios","nutrition","philosophy","prehistory","professional_accounting",
    "professional_law","professional_medicine","professional_psychology","public_relations","security_studies",
    "sociology","us_foreign_policy","virology","world_religions",
]

**Set Up Your Custom LM-Eval Unitxt Tasks Directory**

reference: https://www.unitxt.ai/en/latest/docs/lm_eval.html


In [None]:
# create a folder to save the yaml data cards to be used with lm eval
os.makedirs(f"{LM_EVAL_ROOT_PATH}/examples/pt_variations_tasks", exist_ok=True)

After create the tasks directory, run the following code to save the Unitxt configuration file in your tasks directory:

In [None]:
!python -c 'from lm_eval.tasks.unitxt import task; import os.path; \
    print("class: !function " + task.__file__.replace("task.py", "task.Unitxt"))' > ./pt_variations_tasks/unitxt

You will now have a unitxt file in your `./pt_variations_tasks` directory that defines the integration with your local virtual environment. This step should be performed once. Note that when changing virtual environments, you will need to update it using the code above.

**Create my recipe, with the template list**

In [None]:
card = f"cards.{dataset_name}.{subtasks[0]} "
template = template_list

data = {
    "task": "mmlu_variation_example",
    "include": "unitxt",
    "recipe": f"card={card},template={template[2]}",
}

write_card_yaml(f"{LM_EVAL_ROOT_PATH}/examples/pt_variations_tasks/{dataset_name}_variation_example.yaml", data)

Execute your newly constructed task, with your selected model with:

In [None]:
!lm_eval --model hf \
    --model_args pretrained=google/flan-t5-base --limit 10\
    --device cpu --tasks mmlu_variation_example --include_path pt_variations_tasks