In [1]:
import tqdm
from modelscope import AutoModelForCausalLM, AutoTokenizer
model_name = 'Qwen/Qwen2.5-0.5B-Instruct'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-09 17:47:51,622 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-09 17:47:54,186 - modelscope - INFO - Target directory already exists, skipping creation.


In [3]:
from datasets import load_dataset
data = load_dataset("gsm8k", "main")

'(ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: b51c0e55-7ece-4e65-a001-a55c9440f49c)')' thrown while requesting HEAD https://huggingface.co/datasets/gsm8k/resolve/main/README.md
Retrying in 1s [Retry 1/5].
'(ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: 788c8c0f-dae1-40be-8ca2-ca5e81e6d888)')' thrown while requesting HEAD https://huggingface.co/datasets/gsm8k/resolve/main/README.md
Retrying in 2s [Retry 2/5].
'(ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: 5e243b60-039e-473f-8e5b-ac784c250eb8)')' thrown while requesting HEAD https://huggingface.co/datasets/gsm8k/resolve/main/README.md
Retrying in 4s [Retry 3/5].
'(ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: cff0bb33-ebea-4ac8-ad3a-33451263327a)')' thrown while requesting HEAD https://huggingfa

In [4]:
from functools import partial

def create_prompt_formats(sample):
    SYSTEM_PROMPT = """You are Qwen, created by Alibaba Cloud. According to the question, please provide the user with detailed reasoning steps and answer in the following format:\n<reasoning>\n...\n</reasoning>\n<answer>\n...\n</answer>\n"""

    reasoning_steps, answer = [p.strip() for p in sample["answer"].split("####")]

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": sample["question"] + "\n"},
        {"role": "assistant", "content": f"<reasoning>\n{reasoning_steps}\n</reasoning>\n<answer>\n{answer}\n</answer>\n"}
        ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
        )

    sample["formatted_prompt"] = text

    return sample

def preprocess_batch(batch, tokenizer):
    return tokenizer(batch["formatted_prompt"], truncation=True)

def preprocess_dataset(tokenizer, dataset):
    dataset = dataset.map(create_prompt_formats)

    _preprocessing_function = partial(preprocess_batch, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=['question', 'answer']
    )
    return dataset


In [5]:
train_dataset = preprocess_dataset(tokenizer, data['train'])
test_dataset = preprocess_dataset(tokenizer, data['test'])
train_dataset, test_dataset

Map: 100%|██████████| 7473/7473 [00:00<00:00, 15649.47 examples/s]
Map: 100%|██████████| 7473/7473 [00:00<00:00, 10277.80 examples/s]
Map: 100%|██████████| 1319/1319 [00:00<00:00, 16708.46 examples/s]
Map: 100%|██████████| 1319/1319 [00:00<00:00, 9844.38 examples/s] 


(Dataset({
     features: ['formatted_prompt', 'input_ids', 'attention_mask'],
     num_rows: 7473
 }),
 Dataset({
     features: ['formatted_prompt', 'input_ids', 'attention_mask'],
     num_rows: 1319
 }))

In [6]:
import wandb

wandb.init(project="LongCoT_Math_Inference")

[34m[1mwandb[0m: Currently logged in as: [33mlogan-zh-cai[0m ([33mlogan-cai[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
from peft import LoraConfig
from trl.trainer import SFTTrainer, SFTConfig

output_dir = 'outputs/Long-CoT-Math-Inference-Finetuning'
run_name = 'Qwen2.5-0.5B-Long-CoT-gsm8k'

tokenizer.pad_token = tokenizer.eos_token

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias='none',
    lora_dropout=0.01,
    task_type='CAUSAL_LM'
)

sft_config = SFTConfig(
    output_dir=output_dir,
    run_name=run_name,
    learning_rate=1e-4,
    adam_beta1=0.9,
    adam_beta2=0.99,
    weight_decay=0.1,
    warmup_ratio=0.1,
    lr_scheduler_type='cosine',
    logging_steps=1,
    bf16=True,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    max_length=512,
    num_train_epochs=1,
    save_steps=100,
    do_eval=True,
    eval_strategy="steps",
    eval_steps=50,
    per_device_eval_batch_size=2,
    gradient_checkpointing=True,
    overwrite_output_dir=True,
    report_to='wandb',
)

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    args=sft_config
)

trainer.train()
trainer.save_model()

Truncating train dataset: 100%|██████████| 7473/7473 [00:00<00:00, 464707.17 examples/s]
Truncating eval dataset: 100%|██████████| 1319/1319 [00:00<00:00, 236695.63 examples/s]
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151645}.


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
50,1.2123,1.428616,1.121758,51412.0,0.702637
100,0.4224,0.993301,0.577937,101426.0,0.799841
150,0.3709,0.908663,0.531967,152072.0,0.809796
200,0.2712,0.900655,0.527968,203340.0,0.811724
250,0.3421,0.914902,0.503196,254997.0,0.811708
300,0.3948,0.902828,0.517793,304049.0,0.812669
350,0.3548,0.90048,0.510114,355207.0,0.812872
400,0.2226,0.907529,0.507379,406049.0,0.813435
450,0.2979,0.894996,0.517039,456321.0,0.81392
500,0.2539,0.895167,0.505356,507899.0,0.814151


In [8]:
from numpy import dtype
from peft import PeftModel
import torch

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype="auto",
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

ft_model = PeftModel.from_pretrained(
    base_model,
    './outputs/Long-CoT-Math-Inference-Finetuning/checkpoint-1869',
    dtype=torch.bfloat16,
    is_trainable=False
)

Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-09 18:29:32,389 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /home/logan/.cache/modelscope/hub/models/Qwen/Qwen2.5-0.5B-Instruct


2025-10-09 18:29:34,620 - modelscope - INFO - Target directory already exists, skipping creation.


In [9]:
sample = data['test'][0]

SYSTEM_PROMPT = """You are Qwen, created by Alibaba Cloud. According to the question, please provide the user with detailed reasoning steps and answer in the following format:\n<reasoning>\n...\n</reasoning>\n<answer>\n...\n</answer>\n"""

messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": sample["question"]},
        ]

text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        )

model_inputs = tokenizer([text], return_tensors='pt').to(ft_model.device)

generated_ids = ft_model.generate(**model_inputs, max_new_tokens=512)

text = tokenizer.batch_decode(generated_ids, skip_special_tokens=False)[0]

print(text)

<|im_start|>system
You are Qwen, created by Alibaba Cloud. According to the question, please provide the user with detailed reasoning steps and answer in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
<|im_end|>
<|im_start|>user
Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?<|im_end|>
<|im_start|>assistant
<reasoning>
She has a total of 7 days because there are 365 / 52 = <<365/52=7>>7 days in a year.
So, she has 7 * 16 = <<7*16=112>>112 eggs.
She eats 3 + 4 = <<3+4=7>>7 eggs each day.
So, she gives away 112 - 7 = <<112-7=95>>95 eggs.
She makes 95 x 2 = $<<95*2=190>>190 from selling the eggs at the farmers' market.
</reasoning>
<answer>
190
</answer>
<|im_end|>
