In [None]:
from transformers import AutoTokenizer

model_path="/root/projects/ascend_r1_turtorial/output/qwen-3b-r1-coundown"

tokenizer = AutoTokenizer.from_pretrained(model_path)

def generate_r1_prompt(numbers, target):
    """
    生成 R1 Countdown 游戏提示词

    参数:
        numbers (list[int]): 数字列表
        target (int): 目标值
    返回:
        dict: 生成的一个数据样本
    """
    # 定义提示词前缀
    r1_prefix = [
        {
            "role": "user",
            "content": f"使用给定的数字 {numbers}，创建一个等于 {target} 的方程。你可以使用基本算术运算（+、-、*、/）一次或多次，但每个数字只能使用一次。在 <think> </think> 标签中展示你的思考过程，并在 <answer> </answer> 标签中返回最终方程，例如 <answer> (1 + 2) / 3 </answer>。在 <think> 标签中逐步思考。",
        },
        {
            "role": "assistant",
            "content": "让我们逐步解决这个问题。\n<think>",  # 结尾使用 `<think>` 促使模型开始思考
        },
    ]

    return  tokenizer.apply_chat_template(
            r1_prefix, tokenize=False, continue_final_message=True)

text=generate_r1_prompt([19,5,3,],7)
print(text)

import torch

from transformers import AutoModelForCausalLM
device = "cuda:3"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,
    device_map=device
)

model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=1024,
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)