In [1]:
import tqdm
from modelscope import AutoModelForCausalLM, AutoTokenizer
model_name = 'Qwen/Qwen2.5-0.5B-Instruct'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-01 00:44:24,633 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-01 00:44:26,232 - modelscope - INFO - Target directory already exists, skipping creation.


In [2]:
from datasets import load_dataset
data = load_dataset("gsm8k", "main")

In [3]:
from functools import partial

def create_prompt_formats(sample):
    INTRO = "Instruct: Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = 'Input: According to the following questions, please give detailed reasoning steps and answers in the following format: \n<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>'
    RESPONSE_KEY = 'Output:'

    blurb = f"{INTRO}"
    instruction = f"{INSTRUCTION_KEY}"
    input_context = f"{sample['question']}" if sample['question'] else None

    reasoning, answer = [p.strip() for p in sample['answer'].split('####')]
    response = f"{RESPONSE_KEY}\n<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>"

    parts = [part for part in [blurb, instruction, input_context, response] if part]
    formatted_prompt = "\n\n".join(parts)

    sample["formatted_prompt"] = formatted_prompt
    
    return sample

def preprocess_batch(batch, tokenizer):
    return tokenizer(batch["formatted_prompt"], truncation=True)

def preprocess_dataset(tokenizer, dataset):
    dataset = dataset.map(create_prompt_formats)

    _preprocessing_function = partial(preprocess_batch, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=['question', 'answer']
    )
    return dataset


In [4]:
train_dataset = preprocess_dataset(tokenizer, data['train'])
test_dataset = preprocess_dataset(tokenizer, data['test'])
train_dataset, test_dataset

(Dataset({
     features: ['formatted_prompt', 'input_ids', 'attention_mask'],
     num_rows: 7473
 }),
 Dataset({
     features: ['formatted_prompt', 'input_ids', 'attention_mask'],
     num_rows: 1319
 }))

In [5]:
import wandb

wandb.init(project="LongCoT_Math_Inference")

[34m[1mwandb[0m: Currently logged in as: [33mlogan-zh-cai[0m ([33mlogan-cai[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
from peft import LoraConfig
from trl.trainer import SFTTrainer, SFTConfig

output_dir = 'outputs/Long-CoT-Math-Inference-Finetuning'
run_name = 'Qwen2.5-0.5B-Long-CoT-gsm8k'

tokenizer.pad_token = tokenizer.eos_token

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias='none',
    lora_dropout=0.01,
    task_type='CAUSAL_LM'
)

sft_config = SFTConfig(
    output_dir=output_dir,
    run_name=run_name,
    learning_rate=1e-4,
    adam_beta1=0.9,
    adam_beta2=0.99,
    weight_decay=0.1,
    warmup_ratio=0.1,
    lr_scheduler_type='cosine',
    logging_steps=1,
    bf16=True,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    max_length=512,
    num_train_epochs=1,
    save_steps=100,
    do_eval=True,
    eval_strategy="steps",
    eval_steps=50,
    per_device_eval_batch_size=2,
    gradient_checkpointing=True,
    overwrite_output_dir=True,
    report_to='wandb',
)

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    args=sft_config
)

trainer.train()
trainer.save_model()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
50,1.1038,1.304907,1.220567,52212.0,0.693677
100,0.4119,0.959633,0.530438,103026.0,0.806221
150,0.3684,0.891864,0.531519,154464.0,0.812313
200,0.269,0.879994,0.517749,206532.0,0.814471
250,0.3285,0.90715,0.489435,258985.0,0.814356
300,0.3717,0.888174,0.50508,308837.0,0.815616
350,0.3454,0.884377,0.501222,360783.0,0.816004
400,0.2224,0.888815,0.498862,412421.0,0.816821
450,0.2843,0.884527,0.504984,463493.0,0.816444
500,0.2502,0.878726,0.496081,515863.0,0.81696


In [15]:
from numpy import dtype
from peft import PeftModel
import torch

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype="auto",
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

ft_model = PeftModel.from_pretrained(
    base_model,
    './outputs/Long-CoT-Math-Inference-Finetuning/checkpoint-1869',
    dtype=torch.bfloat16,
    is_trainable=False
)

Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-01 02:02:24,730 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-01 02:02:26,908 - modelscope - INFO - Target directory already exists, skipping creation.


In [18]:
prompt = "Instruct: Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\nInput: According to the following questions, please give detailed reasoning steps and answers in the following format: \n<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>\n\nJanet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?\n\nOutput:\n"

toks = tokenizer(prompt, return_tensors="pt")
res = ft_model.generate(**toks.to("cuda"),
                        max_new_tokens=512).to("cpu")
res_a = tokenizer.batch_decode(res, skip_special_tokens=True)
print(res_a[0])

Instruct: Below is an instruction that describes a task. Write a response that appropriately completes the request.

Input: According to the following questions, please give detailed reasoning steps and answers in the following format: 
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>

Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?

Output:
<reasoning>
She eats 3 + 4 = <<3+4=7>>7 muffins every day.
So she sells 16 - 7 = <<16-7=9>>9 eggs every day.
Thus, she makes 9 * 2 = $<<9*2=18>>18 every day from selling eggs.
</reasoning>
<answer>
18
</answer>


In [9]:
print(test_dataset[0]['formatted_prompt'])

Instruct: Below is an instruction that describes a task. Write a response that appropriately completes the request.

Input: According to the following questions, please give detailed reasoning steps and answers in the following format: 
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>

Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?

Output:
<reasoning>
Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
</reasoning>
<answer>
18
</answer>
