In [1]:
import os
os.chdir("..")

In [2]:
import json
import pandas as pd
import yaml
import time
from pathlib import Path

from transformers import AutoTokenizer
from math_evaluator import MathEval

In [3]:
def get_task_instruction_math(question, question_prompt_template=None, step_by_step=False, tokenizer=None, apply_chat_template=False, system_msg=None):
    if question_prompt_template is not None:
        prompt = question_prompt_template.format(question)
    else:
        if not step_by_step:
            prompt = (
                'Please answer the following math question. '
                'Provide your final answer in the format \\boxed{YOUR_ANSWER}.\n\n'
                f'Question:\n{question}\n\n'
            )
        else:
            prompt = (
                'Please answer the following math question. You should think step by step to solve it.\n\n'
                'Provide your final answer in the format \\boxed{YOUR_ANSWER}.\n\n'
                f'Question:\n{question}\n\n'
            )
    if tokenizer is not None and apply_chat_template:
        messages = []
        if system_msg is not None:
            messages.append({"role": "system", "content": system_msg})
        messages.append({"role": "user", "content": prompt})
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return prompt

In [None]:
class Args:
    dataset_name = "imobench"
    split = "answerbench_subset"
    k_list = [1,4,8,32]
    subset_num = None
    step_by_step_prompt = True
    n_threads = 1
args = Args()

# job_dir = Path(f"runs/default/{args.dataset_name}.llama-3.2-1b-inst")
# job_dir = Path(f"runs/default/{args.dataset_name}.gemma-3-1b-it")
# job_dir = Path(f"runs/default/{args.dataset_name}.qwen-1.5b-inst")
# job_dir = Path(f"runs/default/{args.dataset_name}.llama-3.1-8b-inst")
# job_dir = Path(f"runs/default/{args.dataset_name}.qwen2.5-math-7b")

job_dir = Path(f"runs/v1/{args.dataset_name}.qwen2.5-math-7b")


# try:
#     job_dir.mkdir(parents=True, exist_ok=True)
#     print(f"Directory '{job_dir}' and its parent directories created successfully.")
# except OSError as e:
#     print(f"Error creating directory: {e}")
    
prompt_csv_path = f'{job_dir}/{args.split}.prompts.csv'

with open(f"{job_dir}/tokenization.yaml", "r") as f:
    sampler_config = yaml.safe_load(f)
sampler_config

FileNotFoundError: [Errno 2] No such file or directory: 'runs/v1/imobench.qwen2.5-math-1.5b/tokenization.yaml'

In [None]:
tokenizer = None
if sampler_config['apply_chat_template'] and "tokenizer" in sampler_config:
    tokenizer = AutoTokenizer.from_pretrained(**sampler_config['tokenizer'])
    
eval = MathEval(args.dataset_name, 
                args.split, 
                args.k_list, 
                args.subset_num, 
                step_by_step_prompt=True,
                n_threads=args.n_threads,
                data_root_dir="../datasets")

processed_prompt_data = []
for e in eval.examples:
    question = e["Question"]
    e['prompt'] = get_task_instruction_math(question, 
                                               sampler_config.get("question_prompt_template"),
                                               tokenizer=tokenizer,
                                               apply_chat_template=True,
                                               system_msg=sampler_config.get("system_msg"),
                                               step_by_step=True,)
    processed_prompt_data.append(
        (e['id'], e['id'], e['Question'], e['answer'], e['prompt'])
    )
prompt_df = pd.DataFrame(data=processed_prompt_data, columns=['question_id', 'prompt_id', 'question', 'answer', 'prompt'])
prompt_df.to_csv(prompt_csv_path, index=False)

Num eval samples is  30
