In [1]:
import os
os.chdir("..")

In [2]:
import json
import pandas as pd
import yaml
import time
from pathlib import Path

from transformers import AutoTokenizer
from math_evaluator import MathEval

In [3]:
def get_task_instruction_math(question, question_prompt_template=None, step_by_step=False, tokenizer=None, apply_chat_template=False):
    if question_prompt_template is not None:
        prompt = question_prompt_template.format(question)
    else:
        if not step_by_step:
            prompt = (
                'Please answer the following math question. '
                'Provide your final answer in the format \\boxed{YOUR_ANSWER}.\n\n'
                f'Question:\n{question}\n\n'
            )
        else:
            prompt = (
                'Please answer the following math question. You should think step by step to solve it.\n\n'
                'Provide your final answer in the format \\boxed{YOUR_ANSWER}.\n\n'
                f'Question:\n{question}\n\n'
            )
    if tokenizer is not None and apply_chat_template:
        prompt = [
            # {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": prompt},
        ]
        prompt = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True, date_string="26 Jul 2024")
    return prompt
    
class Args:
    dataset_name = "math500"
    split = "test"
    k_list = [1,4,8,32]
    subset_num = None
    step_by_step_prompt = True
    n_threads = 1
args = Args()

In [None]:
# job_dir = Path(f"runs/default/{args.dataset_name}.llama-3.2-1b-inst")
# job_dir = Path(f"runs/default/{args.dataset_name}.gemma-3-1b-it")
# job_dir = Path(f"runs/default/{args.dataset_name}.qwen-1.5b-inst")
job_dir = Path(f"runs/default/{args.dataset_name}.llama-3.1-8b-inst")
# try:
#     job_dir.mkdir(parents=True, exist_ok=True)
#     print(f"Directory '{job_dir}' and its parent directories created successfully.")
# except OSError as e:
#     print(f"Error creating directory: {e}")
    
prompt_csv_path = f'{job_dir}/{args.split}.prompts.csv'

with open(f"{job_dir}/tokenization.yaml", "r") as f:
    sampler_config = yaml.safe_load(f)
sampler_config

{'tokenizer': {'pretrained_model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct',
  'trust_remote_code': True},
 'question_prompt_template': 'Can you solve the following math problem? {} Put your final answer within \\boxed{{}}.'}

In [8]:
if "tokenizer" in sampler_config:
    tokenizer = AutoTokenizer.from_pretrained(**sampler_config['tokenizer'])
else:
    tokenizer = None
    
eval = MathEval(args.dataset_name, 
                args.split, 
                args.k_list, 
                args.subset_num, 
                step_by_step_prompt=True,
                n_threads=args.n_threads)

processed_prompt_data = []
for e in eval.examples:
    question = e["Question"]
    e['prompt'] = get_task_instruction_math(question, 
                                               sampler_config.get("question_prompt_template"),
                                               tokenizer=tokenizer,
                                               apply_chat_template=True,
                                               step_by_step=True,)
    processed_prompt_data.append(
        (e['id'], e['id'], e['Question'], e['answer'], e['prompt'])
    )
prompt_df = pd.DataFrame(data=processed_prompt_data, columns=['question_id', 'prompt_id', 'question', 'answer', 'prompt'])
prompt_df.to_csv(prompt_csv_path, index=False)

Num eval samples is  500
