In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
os.chdir("..")

In [2]:
import sys
sys.path.append("./vrevals")

In [3]:
import json
import pandas as pd
import yaml
import time
from pathlib import Path

from transformers import AutoTokenizer

In [4]:
from sampler.chat_completion_sampler import ChatCompletionSampler, DivFirstSampler
from sampler.vllm_sampler import VLLMSampler
from math_evaluator import MathEval

INFO 11-22 16:09:11 [__init__.py:216] Automatically detected platform cuda.


In [5]:
def get_task_instruction_math(question, question_prompt_template=None, step_by_step=False, tokenizer=None, apply_chat_template=False):
    if question_prompt_template is not None:
        prompt = question_prompt_template.format(question)
    else:
        if not step_by_step:
            prompt = (
                'Please answer the following math question. '
                'Provide your final answer in the format \\boxed{YOUR_ANSWER}.\n\n'
                f'Question:\n{question}\n\n'
            )
        else:
            prompt = (
                'Please answer the following math question. You should think step by step to solve it.\n\n'
                'Provide your final answer in the format \\boxed{YOUR_ANSWER}.\n\n'
                f'Question:\n{question}\n\n'
            )
    if tokenizer is not None and apply_chat_template:
        prompt = [{"role": "user", "content": prompt}]
        prompt = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
    return prompt
    
class Args:
    dataset_name = "gsm8k"
    split = "test"
    k_list = [1,4,8,32]
    subset_num = None
    step_by_step_prompt = True
    n_threads = 1
args = Args()

In [6]:
job_dir = Path(f"vrevals/runs/default/{args.dataset_name}.qwen-1.5b-inst")
try:
    job_dir.mkdir(parents=True, exist_ok=True)
    print(f"Directory '{job_dir}' and its parent directories created successfully.")
except OSError as e:
    print(f"Error creating directory: {e}")
    
prompt_csv_path = f'{job_dir}/{args.split}.prompts.csv'
sampler_config_dir = f'{job_dir}/distilled-50.direct/sample_2'

with open(f"{sampler_config_dir}/sampler_config.yaml", "r") as f:
    sampler_config = yaml.safe_load(f)
sampler_config

Directory 'vrevals/runs/default/gsm8k.qwen-1.5b-inst' and its parent directories created successfully.


{'tokenizer': {'pretrained_model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct',
  'trust_remote_code': True},
 'question_prompt_template': 'Can you solve the following math problem? {} Put your final answer within \\boxed{{}}.',
 'sampler': {'class': 'VLLMSampler',
  'model_name': 'nnheui/thinking_distilled-qwen2.5-1.5b-instruct-gsm8k',
  'revision': 'step_50',
  'temperature': 0.7,
  'top_p': 1.0,
  'top_k': -1,
  'max_tokens': 6000}}

In [7]:
# if "tokenizer" in sampler_config:
#     tokenizer = AutoTokenizer.from_pretrained(**sampler_config['tokenizer'])
# else:
#     tokenizer = None
    
# eval = MathEval(args.dataset_name, 
#                 args.split, 
#                 args.k_list, 
#                 args.subset_num, 
#                 step_by_step_prompt=True,
#                 n_threads=args.n_threads)

# processed_prompt_data = []
# for e in eval.examples:
#     question = e["Question"]
#     e['prompt'] = get_task_instruction_math(question, 
#                                                sampler_config.get("question_prompt_template"),
#                                                tokenizer=tokenizer,
#                                                apply_chat_template=True,
#                                                step_by_step=True,)
#     processed_prompt_data.append(
#         (e['id'], e['id'], e['Question'], e['answer'], e['prompt'])
#     )
# prompt_df = pd.DataFrame(data=processed_prompt_data, columns=['question_id', 'prompt_id', 'question', 'answer', 'prompt'])
# prompt_df.to_csv(prompt_csv_path, index=False)

In [8]:
prompt_df = pd.read_csv(prompt_csv_path)
print(prompt_df['prompt'][0])
with open(f"{sampler_config_dir}/sampler_config.yaml", "r") as f:
    sampler_config = yaml.safe_load(f)
sampler_config

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
Can you solve the following math problem? Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? Put your final answer within \boxed{}.<|im_end|>
<|im_start|>assistant



{'tokenizer': {'pretrained_model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct',
  'trust_remote_code': True},
 'question_prompt_template': 'Can you solve the following math problem? {} Put your final answer within \\boxed{{}}.',
 'sampler': {'class': 'VLLMSampler',
  'model_name': 'nnheui/thinking_distilled-qwen2.5-1.5b-instruct-gsm8k',
  'revision': 'step_50',
  'temperature': 0.7,
  'top_p': 1.0,
  'top_k': -1,
  'max_tokens': 6000}}

In [9]:
# Extract tokenizer config and sampler config
tokenizer_config = sampler_config.get("tokenizer", {})
sampler_config_section = sampler_config.get("sampler", {})

# Dynamically load the sampler class
sampler_class_name = sampler_config_section.get("class", "ChatCompletionSampler")
sampler_classes = {
    "ChatCompletionSampler": ChatCompletionSampler,
    "DivFirstSampler": DivFirstSampler,
    "VLLMSampler": VLLMSampler,
}
# SamplerClass = sampler_classes[sampler_class_name]
SamplerClass = VLLMSampler

# Remove keys that are not arguments to SamplerClass.__init__
init_args = {
    # "api_key_name": "VLLM_TOKEN",
    # "base_url": f"http://localhost:{port}/v1",
}
for k, v in sampler_config_section.items():
    if k == "class":
        continue
    # Renaming config keys to match the argument names where needed
    if k == "model_name":
        # init_args["model"] = v
        init_args["model_name_or_path"] = v
    # elif k == "api_key_name": 
    #     continue
    else:
        init_args[k] = v
    
print(init_args)


{'model_name_or_path': 'nnheui/thinking_distilled-qwen2.5-1.5b-instruct-gsm8k', 'revision': 'step_50', 'temperature': 0.7, 'top_p': 1.0, 'top_k': -1, 'max_tokens': 6000}


In [10]:
# Create the sampler
sampler = SamplerClass(**init_args)

INFO 11-22 16:09:21 [utils.py:233] non-default args: {'disable_log_stats': True, 'revision': 'step_50', 'model': 'nnheui/thinking_distilled-qwen2.5-1.5b-instruct-gsm8k'}


INFO 11-22 16:09:22 [model.py:547] Resolved architecture: Qwen2ForCausalLM


`torch_dtype` is deprecated! Use `dtype` instead!


INFO 11-22 16:09:22 [model.py:1510] Using max model len 32768
INFO 11-22 16:09:23 [scheduler.py:205] Chunked prefill is enabled with max_num_batched_tokens=8192.
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:24 [core.py:644] Waiting for init message from front-end.
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:24 [core.py:77] Initializing a V1 LLM engine (v0.11.0) with config: model='nnheui/thinking_distilled-qwen2.5-1.5b-instruct-gsm8k', speculative_config=None, tokenizer='nnheui/thinking_distilled-qwen2.5-1.5b-instruct-gsm8k', skip_tokenizer_init=False, tokenizer_mode=auto, revision=step_50, tokenizer_revision=step_50, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backen

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:30 [default_loader.py:267] Loading weights took 1.16 seconds
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:30 [gpu_model_runner.py:2653] Model loading took 2.8876 GiB and 1.832769 seconds
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:36 [backends.py:548] Using cache directory: /home/grads/hnn5071/.cache/vllm/torch_compile_cache/12371887f6/rank_0_0/backbone for vLLM's torch.compile
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:36 [backends.py:559] Dynamo bytecode transform time: 5.54 s
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:38 [backends.py:164] Directly load the compiled graph(s) for dynamic shape from the cache, took 1.603 s
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:39 [monitor.py:34] torch.compile takes 5.54 s in total
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:40 [gpu_worker.py:298] Available KV cache memory: 38.44 GiB
[1;36m(EngineCo

Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 67/67 [00:02<00:00, 26.97it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 23.48it/s]


[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:45 [gpu_model_runner.py:3480] Graph capturing finished in 5 secs, took 0.62 GiB
[1;36m(EngineCore_DP0 pid=3996937)[0;0m INFO 11-22 16:09:45 [core.py:210] init engine (profile, create kv cache, warmup model) took 15.09 seconds
INFO 11-22 16:09:47 [llm.py:306] Supported_tasks: ['generate']


In [11]:
# sampler_config['sampler']['max_tokens'] = 8000
# sampler.max_tokens = sampler_config['sampler']['max_tokens']

In [12]:
prompts = prompt_df['prompt'].apply(lambda x: x + "<think>\n")
print(prompts[0])

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
Can you solve the following math problem? Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? Put your final answer within \boxed{}.<|im_end|>
<|im_start|>assistant
<think>



In [13]:
response = sampler.complete(prompts, 1)

Adding requests:   0%|          | 0/1319 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1319 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s…

In [14]:
generations = []
for res, (_, row) in zip(response, prompt_df.iterrows()):
    for out in res.choices:
        generations.append((row['question_id'], row['prompt_id'], out.response_text, None, row['answer'], sampler_config))

In [15]:
gen_df = pd.DataFrame(data=generations, columns=['question_id', 'prompt_id', 'response', 'pred_answer', 'gt_answer', 'sampler_config'])

In [16]:
gen_df

Unnamed: 0,question_id,prompt_id,response,pred_answer,gt_answer,sampler_config
0,0,0,"Okay, so I've got this math problem that Janet...",,18,{'tokenizer': {'pretrained_model_name_or_path'...
1,1,1,"Okay, so I have this math problem here: ""A rob...",,3,{'tokenizer': {'pretrained_model_name_or_path'...
2,2,2,"Okay, so I have this math problem to solve, an...",,70000,{'tokenizer': {'pretrained_model_name_or_path'...
3,3,3,"Okay, so James is running 3 sprints each day, ...",,540,{'tokenizer': {'pretrained_model_name_or_path'...
4,4,4,"Okay, so I've got this math problem here about...",,20,{'tokenizer': {'pretrained_model_name_or_path'...
...,...,...,...,...,...,...
1314,1314,1314,"Okay, let's see. So, I have this math problem ...",,8,{'tokenizer': {'pretrained_model_name_or_path'...
1315,1315,1315,"Okay, let me try to figure out this math probl...",,5,{'tokenizer': {'pretrained_model_name_or_path'...
1316,1316,1316,"Okay, so Mark needs a new radiator for his car...",,230,{'tokenizer': {'pretrained_model_name_or_path'...
1317,1317,1317,"Okay, so I've got this math problem here: Farm...",,5,{'tokenizer': {'pretrained_model_name_or_path'...


In [17]:
t = time.localtime()
generation_csv_name = f'generations.{t.tm_mon}.{t.tm_mday},{t.tm_hour}:{t.tm_min}.csv'
gen_df.to_csv(f"{sampler_config_dir}/{generation_csv_name}", index=False)