<a href="https://www.kaggle.com/code/mengaidev/open-deep-diver-sft?scriptVersionId=261757714" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Installation

In [None]:
%%capture
import os
os.environ["UNSLOTH_VLLM_STANDBY"] = "1" # [NEW] Extra 30% context lengths!
os.environ["WANDB_DISABLED"] = "true"
!pip install --upgrade -qqq uv
try: import numpy; get_numpy = f"numpy=={numpy.__version__}"
except: get_numpy = "numpy"
try: import subprocess; is_t4 = "Tesla T4" in str(subprocess.check_output(["nvidia-smi"]))
except: is_t4 = False
get_vllm, get_triton = ("vllm==0.10.1", "triton==3.2.0") if is_t4 else ("vllm", "triton")
!uv pip install -qqq --upgrade     unsloth {get_vllm} {get_numpy} torchvision bitsandbytes xformers
!uv pip install -qqq {get_triton}
!uv pip install "huggingface_hub>=0.34.0" "datasets>=3.4.1,<4.0.
!uv pip install transformers==4.55.4
!uv pip install --no-deps trl==0.22.2

In [None]:
from unsloth import FastLanguageModel
import torch
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import Dataset
import json

# 加载 Qwen2.5-7B 4bit 量化模型
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="/kaggle/input/qwen2.5/transformers/7b-instruct/1",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

# 添加 LoRA 适配器
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA 秩
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

# 加载和处理数据集
def load_data(file_path):
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            if line.strip():
                item = json.loads(line)
                
                # 根据是否有 context 字段构建不同的输入格式
                if "context" in item and item["context"]:
                    input_text = f"上下文: {item['context']}\n问题: {item['question']}"
                else:
                    input_text = f"问题: {item['question']}"
                
                data.append({
                    "input": input_text,
                    "output": item["answer"],
                })
    return Dataset.from_list(data)

# 加载数据集
dataset = load_data("/kaggle/input/openwebpuzzle/webpuzzle_dataset.jsonl") 

# 格式化函数 - 使用 Qwen2.5 的聊天模板
def formatting_prompts(examples):
    texts = []
    for input_text, output in zip(examples["input"], examples["output"]):
        # 使用 Qwen2.5 的聊天格式
        messages = [
            {"role": "user", "content": input_text},
            {"role": "assistant", "content": output}
        ]
        text = tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=False
        )
        texts.append(text)
    return {"text": texts}

# 应用格式化
dataset = dataset.map(formatting_prompts, batched=True)

# 训练参数 - 设置为 3 个训练周期
training_args = TrainingArguments(
    output_dir="./qwen2.5-7b-sft-output",
    num_train_epochs=3,  # 3 个训练周期
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    logging_steps=10,
    save_strategy="epoch",
    optim="adamw_8bit",
    weight_decay=0.01,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    seed=3407,
    report_to=[]
)

# 创建训练器
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=1024,
    args=training_args,
    packing=False,  # 不打包序列以提高训练效率
)

# 开始训练!
trainer.train()

# 保存模型
model.save_pretrained("qwen2.5-7b-lora-adapter")  # 保存 LoRA 适配器
tokenizer.save_pretrained("qwen2.5-7b-lora-adapter")

