# Supervised Fine-Tuning(SFT)


In [18]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, TrainerCallback
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset


## 1.数据处理

In [1]:
from datasets import load_dataset

test_dataset = load_dataset("YeungNLP/firefly-train-1.1M", split="train[:500]")

Repo card metadata block was not found. Setting CardData to empty.


In [2]:
test_dataset

Dataset({
    features: ['kind', 'input', 'target'],
    num_rows: 500
})

### 1.1目标格式

要把数据处理成和fundation-model（qwen2-0.5b-instruct）的输入格式一致

In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

def format_prompt(example):
    chat = [
        {"role":"system","content":"你是一个由Shaoyj微调的非常强大的人工智能助手."},
        {"role":"user","content":example["input"]},
        {"role":"assistant","content":example["target"]}
    ]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False)
    return {"text": prompt}

dataset = test_dataset.map(format_prompt, remove_columns=test_dataset.column_names)
dataset

Dataset({
    features: ['text'],
    num_rows: 500
})

In [4]:
dataset[0]

{'text': '<|im_start|>system\n你是一个由Shaoyj微调的非常强大的人工智能助手.<|im_end|>\n<|im_start|>user\n自然语言推理：\n前提：家里人心甘情愿地养他,还有几家想让他做女婿的\n假设：他是被家里人收养的孤儿<|im_end|>\n<|im_start|>assistant\n中立<|im_end|>\n'}

## 2.加载模型

In [5]:
import accelerate
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", device_map="auto")

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
tokenizer.padding_side = "left"

## 3.配置

### 3.1LoRA Configuration

In [8]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["k_proj", "q_proj", "v_proj"]
)

model = get_peft_model(model, peft_config)

### 3.2训练配置


In [9]:
from transformers import TrainingArguments

output_dir = "./results"

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="adamw_torch",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    num_train_epochs=1,
    logging_steps=10,
    fp16=True,
    gradient_checkpointing=True,
    
    save_steps=15,
    max_steps=20,
)

## 4.训练

In [15]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    # tokenizer=tokenizer,
    peft_config=peft_config,
)



Adding EOS to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

In [16]:
trainer.train()
trainer.model.save_pretrained("./results/final-result")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Step,Training Loss
10,3.9213
20,3.6108


### 5.回调函数

In [None]:
class TrainingMonitorCallback(TrainerCallback):
    """自定义训练监控回调"""
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            loss = logs.get("loss", None)
            learning_rate = logs.get("learning_rate", None)
            if loss is not None:
                print(f"Step {state.global_step}: Loss = {loss:.4f}, LR = {learning_rate:.2e}")

    def on_epoch_end(self, args, state, control, **kwargs):
        print(f"Epoch {state.epoch} 完成")

# 创建回调实例
callbacks = [TrainingMonitorCallback()]