In [None]:
import torch

# 清理 CUDA 缓存
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
from unsloth import FastLanguageModel

In [None]:
max_seq_length = 4096
dtype = None
load_in_4bit = False

In [None]:
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "/root/autodl-tmp/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
#     max_seq_length = max_seq_length,
#     dtype = dtype,
#     load_in_4bit = load_in_4bit,
# )
model, tokenizer = FastLanguageModel.from_pretrained(
    # model_name = "/root/autodl-tmp/dataset/model",
    model_name = "/root/autodl-tmp/dataset/new_model/qwen-mdpo1",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""  # 注意这里使用命名占位符

In [None]:
def formatting_prompts_func(examples):
    # instructions = examples["instruction"]
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = train_prompt_style.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [None]:
EOS_TOKEN = tokenizer.eos_token
tokenizer.eos_token

In [None]:
from datasets import load_dataset

# 加载数据集（假设是JSONL格式）
dataset = load_dataset("json", data_files="/root/autodl-tmp/dataset/new_data/train/sft_data_with_cot.json", split="train")

# 应用格式转换函数
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
    remove_columns=dataset.column_names  # 移除原始列
)

In [None]:
dataset["text"][0]

In [None]:
# 打印数据集中的条目数量
print(f"数据集包含 {dataset.num_rows} 条数据")

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=32,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [None]:
# 创建有监督微调对象:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        num_train_epochs = 3,
        warmup_steps=5,
        # max_steps=60,
        learning_rate=1e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=5,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=3407,
        output_dir="/root/outputs",
    ),
)

In [None]:
trainer_stats = trainer.train()

In [None]:
trainer_stats

In [None]:
new_model_local = "/root/qwen-sft"
model.save_pretrained(new_model_local)
tokenizer.save_pretrained(new_model_local)
model.save_pretrained_merged("new_model/qwen-sft", tokenizer, save_method="merged_16bit") 