In [None]:
from typing import List,Callable
import os
from pathlib import Path

import argparse
import json
import logging
import sys
from statistics import mean

from tqdm import tqdm
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
from xopen import xopen

from drgrpo_grader import r1_zero_reward_fn
from datetime import datetime



In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3,4,5"


In [None]:
def preprocess_MATH_dataset(filepath, prompt_key="problem", answer_key="solution"):
	prompts = []
	answers = []
	
	with xopen(filepath, 'r') as f:
		for line in f:
			data = json.loads(line.strip())
			prompts.append(data[prompt_key])
			answers.append(data[answer_key])
	
	return prompts, answers
def get_data_hour_str() -> str:
	return datetime.now().strftime("%d_%H%M")

In [None]:
MATH_validation_path = '../data/MATH/validation.jsonl'


In [None]:
r1_zero_prompt ="""A conversation between User and Assistant. The User asks a question, and the Assistant solves it. The Assistant first thinks about the reasoning process in the mind and then provides the User with the answer. The reasoning process is enclosed within <think> </think> and answer is enclosed within <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>.
User: {question}
Assistant: <think>"""

In [None]:
def evaluate_vllm( vllm_model: LLM, reward_fn: Callable[[str, str], dict[str, float]], prompts: List[str], eval_sampling_params: SamplingParams,output_path=None,solutions=None) -> None: 
    """ Evaluate a language model on a list of prompts, compute evaluation metrics, and serialize results to disk. """
	outputs = vllm_model.generate(prompts, eval_sampling_params)

	if isinstance(output_path,str):
		output_path = Path(output_path)
	if output_path is None:
		output_path = Path("../data/output") / (get_data_hour_str()+".jsonl")

	reward_results = []
	

	with open(output_path,'w') as f:
		for idx,output in enumerate(outputs): 
			prompt = output.prompt 
			generated_text = output.outputs[0].text 
			json_line = {"prompt":prompt,"predict":generated_text}
			f.write(json.dumps(json_line) + '\n')

			r = reward_fn(generated_text,solutions[idx])

			reward_results.append(r)
	
	with open(output_path.replace(".jsonl","_reward.json",'w')):
		json.dumps(reward_results,indent=4,ensure_ascll=False)
    

In [None]:
llm = LLM("../models/Qwen2.5-Math-1.5B")
prompts,solutions = preprocess_MATH_dataset(MATH_validation_path)
sampling_params = SamplingParams( temperature=1.0, top_p=1.0, max_tokens=1024, stop=["\n"] )
evaluate_vllm(llm,prompts,r1_zero_reward_fn,prompts,sampling_params,solutions=solutions,output_path="../data/output/math_qwen2.5_1.5b_"+get_data_hour_str()+".jsonl")