# 整理微调模型的代码(LLM)

## 配置文件

In [1]:
"""
MacBook Air M4 LoRA 训练脚本 - 使用用户指定的参数配置
解决MPS设备的兼容性问题
"""
import os
# 在导入 Transformers 之前设置环境变量
os.environ["ACCELERATE_MIXED_PRECISION"] = "no"
os.environ["ACCELERATE_USE_FP16"] = "false" 
os.environ["ACCELERATE_USE_BF16"] = "false"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

# 导入需要使用到的库
import sys
import json
import warnings
import logging
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset, load_from_disk
import transformers
from peft import PeftModel
from colorama import Fore, Style

from tqdm import tqdm
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    GenerationConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_kbit_training
)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


## 使用到的方法

In [3]:
def generate_training_data(data_point):
    """
    将输入和输出文本转换为模型可读的 tokens。
    """
    try:
        # 构建完整的输入提示词
        prompt = f"""[INST] <<SYS>>
You are a helpful assistant and good at writing Tang poem. 你是一個樂於助人的助手且擅長寫唐詩。
<</SYS>>

{data_point["instruction"]}
{data_point["input"]}
[/INST]"""

        # 计算用户提示词的 token 数量
        prompt_tokenized = tokenizer(
            prompt,
            truncation=True, #如果文本太长，超过模型所支持的最大长度，自动截断
            max_length=CUTOFF_LEN,
            padding=False,
            return_tensors=None
        )
        len_user_prompt_tokens = len(prompt_tokenized['input_ids'])

        # 将完整的输入和输出转换为 tokens
        full_text = prompt + " " + data_point["output"] + "</s>"
        full_tokenized = tokenizer(
            full_text,
            truncation=True,
            max_length=CUTOFF_LEN,
            padding="max_length",
            return_tensors=None
        )

        input_ids = full_tokenized['input_ids']
        attention_mask = full_tokenized["attention_mask"]

        # 创建labels，屏蔽提示词部分
        labels = input_ids.copy()
        for i in range(min(len_user_prompt_tokens, len(labels))):
            labels[i] = -100

        return {
            "input_ids": input_ids,
            "labels": labels,
            "attention_mask": attention_mask,
        }
    except Exception as e:
        print(f"数据处理错误: {e}")
        # 返回默认的数据
        return {
            "input_ids": [0] * CUTOFF_LEN,
            "labels": [-100] * CUTOFF_LEN,
            "attention_mask": [1] * CUTOFF_LEN,
        }


In [None]:
def evaluate(instruction, generation_config, max_len, input_text="", verbose=True):
    """
    使用 QWEN 格式生成响应
    """
    prompt = (
        "<|im_start|>system\n你是一位擅长写唐诗的中文助手。\n<|im_end|>\n"
        f"<|im_start|>user\n{instruction}\n{input_text}\n<|im_end|>\n"
        "<|im_start|>assistant\n"
    )

    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    input_ids = input["input_ids"]

    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            max_new_tokens=max_len,
            return_dict_in_generate=True,
            output_scores=True
        )

    output = tokenizer.decode(generation_output.sequences[0], skip_special_tokens=False)
    # 清洗输出：阶段 assistant 开头后面的内容
    if "<|im_start|>assistant" in output:
        output = output.split("<|im_start|>assistant")[1]
    if "<|im_end|>" in output:
        output = output.split("<|im_end|>")[0]
    output = output.strip()

    if verbose:
        print(output)
    return output