In [None]:
from datasets import load_dataset, Dataset
from trl import SFTConfig, SFTTrainer
from transformers import AutoTokenizer

model_name = "/data/xxx/LLMs/Qwen/Qwen2.5-0.5B-Instruct"
output_dir="/data/xxx/tigerHandle_c4/chap44_sft_distillation_output"

import pandas as pd

In [4]:
# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 加载数据
dataset = load_dataset("/data/zhuantai/LLMs/liucong___chinese-deep_seek-r1-distill-data-110k-sft", split="train")

data = pd.DataFrame(dataset)

In [5]:
data['seq_len'] = data['prompt_tokens_len'] + data['reasoning_content_tokens_len'] + data['content_tokens_len']

In [6]:
data.query('seq_len < 1000', inplace = True)

In [7]:
dataset = Dataset.from_pandas(data)

In [8]:
# 将数据格式转换为Qwen的chatml
def convert_to_chatml(samples):
    output_texts = []
    for i in range(len(samples['instruction'])):

        user_content = samples['instruction'][i]
        assistant_content = samples['output'][i]
        
        chatml_text = f"<|im_start|>system\nYou are a helpful assistant\nRespond in the following format:\n<think>\n...\n</think>\n...\n<|im_end|>\n"
        chatml_text += f"<|im_start|>user\n{user_content}<|im_end|>\n"
        chatml_text += f"<|im_start|>assistant\n{assistant_content}<|im_end|>\n"

        output_texts.append(chatml_text)
    return output_texts

In [9]:
# 设置模型训练参数
training_args = SFTConfig(
    # 控制数据预处理时单条样本（输入+输出）的 token 序列最大长度
    bf16=True,
    max_seq_length=2048,
    num_train_epochs = 2,
    gradient_accumulation_steps=4,
    per_device_train_batch_size=1,
    # 日志记录步数
    logging_steps=500,
    output_dir=output_dir,
    save_steps = 10000
)

In [None]:
trainer = SFTTrainer(
    model_name,
    args=training_args,
    train_dataset=dataset,
    # QWen chatml格式转化函数
    formatting_func=convert_to_chatml,
)

trainer.train()

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Map:   0%|          | 0/42379 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
500,1.6746
1000,1.613
1500,1.547
2000,1.5686
2500,1.532
3000,1.5074
3500,1.5144
4000,1.5028
4500,1.5013
5000,1.4741
