In [None]:
# !pip install unsloth

In [None]:
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import TrainingArguments
from trl import SFTTrainer
import torch


alpaca_prompt = """Below is an instruction that describes a task, paired with
an input that provides further context. Write a response that appropriately
completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""



dataset_full = load_dataset("yahma/alpaca-cleaned", split="train")
# 为了快速演示，可以只取一小部分数据
# dataset = dataset_full.select(range(1000)) # 例如，取前1000条
dataset = dataset_full # 使用完整数据集

print(f"Dataset loaded. Number of examples: {len(dataset)}")

In [None]:
max_seq_length = 2048
dtype = None  # Auto detection
load_in_4bit = True # 使用4位量化以节省显存

# 加载教师模型和分词器
teacher_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen2.5-7B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


EOS_TOKEN = tokenizer.eos_token
if EOS_TOKEN is None: # 有些tokenizer可能没有预设eos_token，需要手动指定或添加
    tokenizer.eos_token = "<|endoftext|>" # 示例，具体看Qwen2.5的推荐
    EOS_TOKEN = tokenizer.eos_token
    print(f"EOS_TOKEN was None, set to: {EOS_TOKEN}")


def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        # alpaca_prompt.format的参数顺序是 instruction, input, output
        text = alpaca_prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts, }


dataset = dataset.map(formatting_prompts_func, batched=True,)
print("Dataset formatted.")


In [None]:
# 配置LoRA参数
teacher_model = FastLanguageModel.get_peft_model(
    teacher_model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)
print("Teacher model configured with LoRA.")
teacher_model.print_trainable_parameters() # 打印可训练参数信息



In [None]:
print("\nTraining and saving the teacher model...")

# 配置训练参数
training_args_teacher = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4, # Effective batch size = 2 * 4 = 8
    # num_train_epochs=1, # 可以按epoch训练
    max_steps=1,       # 或者按max_steps训练 (文档中使用此方式)
    warmup_ratio=0.1,
    learning_rate=2e-4,
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs_teacher_finetune", # 为教师模型微调指定一个清晰的输出目录
    report_to="none",
)

# 初始化SFTTrainer
trainer_teacher = SFTTrainer(
    model=teacher_model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2, # 根据你的CPU核心数调整
    packing=False,
    args=training_args_teacher,
)

print("Starting teacher model fine-tuning...")
trainer_teacher.train()
print("Teacher model fine-tuning completed.")


In [None]:
# 保存微调后的教师模型 (LoRA权重) 和分词器
save_directory_teacher = "qwen_teacher_finetune" # 与文档一致的保存目录名
teacher_model.save_pretrained(save_directory_teacher)
tokenizer.save_pretrained(save_directory_teacher)
print(f"Finetuned teacher model and tokenizer saved to '{save_directory_teacher}'.")

print("\nTeacher model training and saving process finished.")