# Unsloth 的微调样例代码

## 环境配置

系统为`Ubuntu 20.04`
CUDA为12.4，驱动是550.54.15

### 安装Conda
```bash
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
source ~/.bashrc
```

### 创建虚拟环境、添加一些镜像列表
```bash
# conda环境的镜像
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels https://mirrors.bfsu.edu.cn/anaconda/pkgs/free/
conda config --add channels https://mirrors.bfsu.edu.cn/anaconda/pkgs/main/
conda config --add channels https://mirrors.bfsu.edu.cn/anaconda/cloud/conda-forge/
# pip镜像
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
pip config set global.index-url http://mirrors.cloud.tencent.com/pypi/simple
pip config set global.index-url http://pypi.douban.com/simple/
pip config set global.index-url https://mirrors.163.com/pypi/simple/
# 创建虚拟环境
conda create -n <环境名称> python=3.12
# 激活你创建的环境
conda activate <环境名称>
```

### 安装运行库
```bash
# pytorch安装
pip install --upgrade torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124 --trusted-host download.pytorch.org
# 其他的依赖安装
pip install --upgrade torch==2.6.0 transformers BitsandBytes accelerate nltk numpy pandas tensorboardX evaluate scikit-learn sentence-transformers tiktoken deepspeed SentencePiece unsloth qwen-vl-utils[decord] nvitop trl -i https://mirrors.aliyun.com/pypi/simple/
# 如果用jupyter notebook 需要安装以下两个
conda install -n testllm ipykernel --update-deps --force-reinstall
conda install -n testllm IProgress
```

## 配置GPU、并引入包体

In [1]:
import os, re, argparse
os.environ["CUDA_VISIBLE_DEVICES"] = "5" # 这里替换成你想要的GPU序号
import torch
from tqdm.auto import tqdm
import numpy as np
from trl import SFTConfig, SFTTrainer
from transformers import (
    TrainingArguments,
    LogitsProcessorList,
    InfNanRemoveLogitsProcessor,
    PreTrainedTokenizer,
    PreTrainedTokenizerBase,
    PreTrainedTokenizerFast,
    AutoTokenizer,
    AutoConfig,
    AutoModelForCausalLM,
    PreTrainedModel,
    PretrainedConfig,
    BitsAndBytesConfig,
    GenerationConfig,
    Seq2SeqTrainingArguments,
    Trainer,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
)
from transformers.trainer import TRAINER_STATE_NAME
from transformers.utils.versions import require_version
from transformers.trainer_utils import SaveStrategy
from transformers.tokenization_utils import PaddingStrategy
from peft import PeftModel, LoraConfig, get_peft_model, TaskType
from peft.utils import (
    CONFIG_NAME,
    SAFETENSORS_WEIGHTS_NAME,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
)

## 加载模型模板

In [2]:
from datasets import Dataset, IterableDataset
from dataclasses import dataclass
# from 模板 import get_template_and_model_path
# from public_apis.file_rw import *
from typing import Union, Optional, List, Dict, Any, Literal, Sequence, Tuple

RANDOM_SEED = 42
IGNORE_INDEX=-100

TEMPLATE_DICT={
    'llama2':"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. \n{user_query}\nAssistant:  ",
    'llama3':"<|start_header_id|>system<|end_header_id|>\nYou are a helpful assistant. <|eot_id|>\n\n<|start_header_id|>user<|end_header_id|>\n{user_query}<|eot_id|>\n\n<|start_header_id|>assistant<|end_header_id|>\n",
    'qwen2.5':"<|im_start|>system\nYou are a helpful assistant. <|im_end|>\n<|im_start|>user\n{user_query}<|im_end|>\n<|im_start|>assistant\n",
    'gpt2':"{user_query}\n"
}

MODEL_BASE_PATH='/data'

# MODEL_BASE_PATH='/data/lzy/models/LLM'

MODEL_NAME_DICT={
    'llama2-7b':os.path.join(MODEL_BASE_PATH,'llama','llama-2-7b-hf'),
    'llama2-13b':os.path.join(MODEL_BASE_PATH,'llama','llama-2-13b-hf'),
    'llama3.1-8b-i':os.path.join(MODEL_BASE_PATH,'llama','llama3.1-8b-instruct'),
    'llama3.1-8b':os.path.join(MODEL_BASE_PATH,'llama','llama3.1-8b'),
    'llama3.2-1b-i':os.path.join(MODEL_BASE_PATH,'llama','llama3.2-1B-Instruct'),
    'llama3.2-3b-i':os.path.join(MODEL_BASE_PATH,'llama','llama3.2-3B-Instruct'),
    'qwen2.5-0.5b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-0.5B-Instruct'),
    'qwen2.5-1.5b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-1.5B-Instruct'),
    'qwen2.5-3b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-3B-Instruct'),
    'qwen2.5-7b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-7B-Instruct'),
    'qwen2.5-14b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-14B-Instruct'),
    'qwen2.5-coder-0.5b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-Coder-0.5B-Instruct'),
    'qwen2.5-coder-1.5b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-Coder-1.5B-Instruct'),
    'qwen2.5-coder-3b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-Coder-3B-Instruct'),
    'qwen2.5-coder-7b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-Coder-7B-Instruct'),
    'qwen2.5-coder-14b-i':os.path.join(MODEL_BASE_PATH,'Qwen','Qwen2.5-Coder-14B-Instruct'),
    # 'gpt2l':os.path.join(MODEL_BASE_PATH,'openai-community','gpt2-large'),
    # 'gpt2':os.path.join(MODEL_BASE_PATH,'openai-community','gpt2')
}

def get_template_and_model_path(**arguments):
    model_name=str(arguments.get('model_name','llama3.1-8b-i'))
    model_name_or_path=MODEL_NAME_DICT[model_name]
    if model_name.startswith('qwen2.5'):
        template=TEMPLATE_DICT['qwen2.5']
    elif model_name.startswith('llama2'):
        template=TEMPLATE_DICT['llama2']
    elif model_name.startswith('gpt2'):
        template=TEMPLATE_DICT['gpt2']
    else:
        template=TEMPLATE_DICT['llama3']
    return model_name,model_name_or_path,template


# 自定义一种错误形式，主要用于防呆以及简化操作
class CustomDatasetTokenizerError(Exception):
    """Base class for custom exceptions in this module."""
    def __init__(self):
        # self.expression = expression
        self.message = "数据集元素的输入键值必须是以下组合：\n\t\"prompt\",\"query\",\"response\"\n\t\"instruction\",\"input\",\"output\",\n\t\"prompt\",\"completion\""
    def __str__(self):
        return self.message
    pass
# prompt_template应该是中间有个关键词叫做 `user_query` 
# 这个函数用于常规的微调（手搓，然后用`transformers`中的`trainer`或者`seq2seqtrainer`直接微调）
# 从载入的JSON文件一步变为令牌化的数据集样式
def process_tokens_tokenizer_functions(example: dict[str, str],tokenizer: Union[PreTrainedTokenizer,PreTrainedTokenizerBase,PreTrainedTokenizerFast,AutoTokenizer],prompt_template: str,train_mode: Literal["sft", "pt"] = "sft"):
    prompt=""
    completion=""
    if "prompt" in example.keys() and "query" in example.keys() and "response" in example.keys():
        prompt=prompt_template.format(user_query="{}\n{}\n".format(example["prompt"], example["query"]))
        completion=example["response"]
    elif "instruction" in example.keys() and "input" in example.keys() and "output" in example.keys():
        prompt=prompt_template.format(user_query="{}\n{}\n".format(example["instruction"], example["input"]))
        completion=example["output"]
    elif "prompt" in example.keys() and "completion" in example.keys():
        prompt=prompt_template.format(user_query="{}\n".format(example["prompt"]))
        completion=example["completion"]
    else:
        raise CustomDatasetTokenizerError()
    # SFT样本构建
    model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
    # 找出非标签部分
    example_str = "{}{}".format(prompt, completion)
    input_id = tokenizer.encode(example_str)
    # 训练数据集末尾带上截止符号
    if input_id[-1] != tokenizer.eos_token_id:
        input_id.append(tokenizer.eos_token_id)
    input_id_before_label = tokenizer.encode(prompt)
    label = input_id.copy()
    # SFT只需要label部分计算梯度，pt需要所有部分计算梯度
    if train_mode == "sft":
        label[: len(input_id_before_label)] = [IGNORE_INDEX] * len(
            input_id_before_label
        )
    attention_mask = [1] * len(input_id)
    # 放入输入字典中
    if "qid" in example.keys():
        model_inputs["qid"] = example["qid"]
    elif "ID" in example.keys():
        model_inputs["qid"] = example["ID"]
    model_inputs["input_ids"] = input_id
    model_inputs["attention_mask"] = attention_mask
    model_inputs["labels"] = label
    return model_inputs
# 直接处理成`trl`可接受的格式（直接文本就行，剩下的交给剩下）
def process_dataset_functions(example: dict[str, str]):
    prompt=""
    completion=""
    if "prompt" in example.keys() and "query" in example.keys() and "response" in example.keys():
        prompt="{}\n{}\n".format(example["prompt"], example["query"])
        completion=example["response"]
    elif "instruction" in example.keys() and "input" in example.keys() and "output" in example.keys():
        prompt="{}\n{}\n".format(example["instruction"], example["input"])
        completion=example["output"]
    elif "prompt" in example.keys() and "completion" in example.keys():
        prompt="{}\n".format(example["prompt"])
        completion=example["completion"]
    else:
        raise CustomDatasetTokenizerError()
    return {"prompt":prompt,"completion":completion,"text":"### Instruction:\n {}\n ### Response:\n {}\n".format(prompt,completion),"ground_truth":completion}
# 查找检查点需要的正则表达式
CHECKPOINT_FOLD = re.compile(r"(?:checkpoint\-\d+)")
FINALMODEL_NAME = re.compile(r"(?:(?:(?:adapter|pytorch)_)?model(?:\-\d+)?\.(?:safetensors|bin))")
# 查找检查点的函数
def checkout_format(string: str, pattern: re.Pattern):
    result = [n for n in pattern.findall(string) if n]
    if len(result) == 1:
        return result[0] == string
    return False
# 查找检查点的函数
def checkpoint_sort_func(checkpoint_dirname: str):
    items = [n.strip() for n in checkpoint_dirname.split("-") if n.strip()]
    return int(items[-1])
# 查找检查点的函数
def findout_checkpoint(path: str):
    if not os.path.exists(path):
        return None
    items = os.listdir(path)
    saved_files = [n for n in items if checkout_format(n, FINALMODEL_NAME)]
    if len(saved_files) > 0:
        return path
    check_dirs = [n for n in items if checkout_format(n, CHECKPOINT_FOLD)]
    if len(check_dirs) < 1:
        return None
    check_dirs.sort(key=lambda x: checkpoint_sort_func(x), reverse=True)
    for f in check_dirs:
        result = findout_checkpoint(os.path.join(path, f))
        if result != None:
            return result
    return None

In [3]:
model_name, model_name_or_path, template = get_template_and_model_path(
    model_name="llama3.1-8b-i"
)

output_dir = "results/sft_trl_none_unsloth"

config_dicts = dict(
    ignore_pad_token_for_loss=True,
    lora_config_dicts=dict(
        lora_rank=8, lora_alpha=32, lora_dropout=0.1, additional_target=None
    ),
)

training_config_dict = dict(
    data_path="/data1/SG_KBQA/SG_KBQA/gen_dataset/webqsp/train_examples.json",
    output_dir=output_dir,
    overwrite_output_dir=False,
    do_train=True,
    lr_scheduler_type="cosine",
    learning_rate=1e-4,
    num_train_epochs=20,
    save_steps=1000,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    fp16=True,
    bf16=False,
    logging_dir=os.path.join(output_dir, "logs"),
    logging_steps=1000,
    ddp_find_unused_parameters=False,
    resume_from_checkpoint=False,
    warmup_ratio=0,
    gradient_checkpointing=False,
)
eval_config_dict = dict(
    data_path="/data1/SG_KBQA/SG_KBQA/gen_dataset/webqsp/dev_examples.json",
    max_new_tokens=2048,
)


generating_args = {
    "val_size": eval_config_dict.get("val_size", 0),
    "streaming": eval_config_dict.get("streaming", False),
    "buffer_size": eval_config_dict.get("buffer_size", 16384),
    "do_sample": eval_config_dict.get("do_sample", True),
    "temperature": eval_config_dict.get("temperature", 1),
    "top_p": eval_config_dict.get("top_p", 0.7),
    "top_k": eval_config_dict.get("top_k", 50),
    "num_beams": eval_config_dict.get(
        "num_beams", 8
    ),  # 训练的时候为`None`，推理的时候大于0
    "max_new_tokens": eval_config_dict.get("max_new_tokens", 1024),
    "repetition_penalty": eval_config_dict.get("repetition_penalty", 1.0),
    "length_penalty": eval_config_dict.get("length_penalty", 1.0),
    # "num_beam_groups":4,
    # "diversity_penalty":0.2
}

lora_config_dicts = config_dicts["lora_config_dicts"]
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=lora_config_dicts["lora_rank"],
    lora_alpha=lora_config_dicts["lora_alpha"],
    lora_dropout=lora_config_dicts["lora_dropout"],
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    modules_to_save=lora_config_dicts.get("additional_target", None),
)
prompt_template = template

model_path = model_name_or_path
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="cuda",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    use_auth_token=True,
)
model.config.use_cache = False
model = get_peft_model(model, lora_config, "default")
loadpath = findout_checkpoint(output_dir)
if loadpath != None:
    print("从`{}`加载lora头".format(loadpath))
    model.load_adapter(loadpath, "default")



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

## 加载数据集并训练

In [4]:
from datasets import load_dataset
dataset = load_dataset("/data1/SG_KBQA/SG_KBQA/gen_dataset/webqsp", split="train")
print(dataset[0])
# dataset_processed=dataset.map(lambda example: {"prompt": "{}\n{}\n".format(example['instruction'],example['input']), "completion":example['output'] ,"text":"### Prompt:\n {}\n{}\n ### Completion:\n {}\n".format(example['instruction'],example['input'],example['output'])})
dataset_processed=dataset.map(lambda example: process_dataset_functions(example))
dataset_processed=dataset_processed.remove_columns(dataset.column_names)
print(dataset_processed[0])

trainer = SFTTrainer(
    model = model,
    # tokenizer = tokenizer,
    processing_class=tokenizer,
    train_dataset = dataset_processed,
    # dataset_text_field = "text",
    # max_seq_length = 2048,
    # dataset_num_proc = 2,    
    args = SFTConfig(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 20, # Set this for 1 full training run.
        save_steps=1000,
        save_strategy=SaveStrategy.STEPS,
        # max_steps = 60,
        learning_rate = 1e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
        activation_offloading=True,
        completion_only_loss=True,
        max_seq_length=2048,
        dataset_num_proc = 24,
        packing = True, # Can make training 5x faster for short sequences.
        padding_free=True,
        # label_names =
    ),
    # formatting_func= lambda example: "### Prompt:\n {}\n ### Completion:\n {}\n".format(example['prompt'],example['completion'])
)
trainer.train()
trainer.save_model()

{'qid': 'WebQTrn-3581', 'instruction': 'Please translate the following question into logical form using the provided relations and entities.', 'input': 'Question: where is university of auburn\nCandidate relations with their corresponding domain class [D], name [N], range class [R]:\n[D] education.education [N] education.education.institution [R] education.educational_institution\n[D] people.person [N] people.person.education [R] education.education\n[D] location.mailing_address [N] location.mailing_address.citytown [R] location.location\n[D] common.topic [N] common.topic.notable_types [R] type.type\n[D] people.person [N] people.person.place_of_birth [R] location.location\n[D] organization.organization [N] organization.organization.headquarters [R] location.mailing_address\n[D] location.location [N] location.location.containedby [R] location.location\n[D] location.mailing_address [N] location.mailing_address.country [R] location.country\n[D] location.mailing_address [N] location.mailin

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[2025-07-09 10:54:26,506] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/lzy/miniconda3/envs/kbqa/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/home/lzy/miniconda3/envs/kbqa/compiler_compat/ld: cannot find -lcufile: No such file or directory
collect2: error: ld returned 1 exit status


[2025-07-09 10:54:27,531] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
1,1.0302
2,1.0573
3,1.0053
4,0.9854
5,0.9643
6,0.9698
7,0.9525
8,0.8954
9,0.8895
10,0.8772
