In [None]:
!uv pip install torch transformers peft datasets pandas accelerate

In [1]:
import pandas as pd
import torch
import numpy as np
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    TrainingArguments, 
    Trainer,
    DataCollatorWithPadding
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.model_selection import train_test_split
from datasets import Dataset

# --- 配置 ---
MODEL_NAME = "zai-org/chatglm3-6b"  # 替换为你本地的模型路径
MAX_LENGTH = 2048 # 显存够的话可以开到 2048
OUTPUT_DIR = "./chatglm3_kaggle_output"

# --- 1. 数据处理 ---
def prepare_dataset(csv_path, tokenizer):
    df = pd.read_csv(csv_path)
    df.fillna("", inplace=True)

    # 转换标签
    def get_label(row):
        if row['winner_model_a'] == 1: return 0
        if row['winner_model_b'] == 1: return 1
        return 2 # Tie
    
    df['label'] = df.apply(get_label, axis=1)
    
    # 构建输入文本
    # 我们可以稍微精简一下 Prompt，让模型聚焦于比较
    def construct_prompt(row):
        return (
            f"<|user|>\nPlease evaluate these two responses:\n\n"
            f"[Prompt]: {row['prompt']}\n\n"
            f"[Response A]: {row['response_a']}\n\n"
            f"[Response B]: {row['response_b']}\n\n"
            f"Which is better?\n<|assistant|>"
        )
    
    df['text'] = df.apply(construct_prompt, axis=1)
    
    # 转换为 Hugging Face Dataset 格式
    raw_dataset = Dataset.from_pandas(df[['text', 'label']])
    
    # Tokenize 函数
    def preprocess_function(examples):
        return tokenizer(
            examples['text'], 
            truncation=True, 
            max_length=MAX_LENGTH,
            padding=False # Padding 交给 DataCollator 动态处理，节省显存
        )
    
    tokenized_dataset = raw_dataset.map(preprocess_function, batched=True)
    
    # 划分训练集和验证集
    return tokenized_dataset.train_test_split(test_size=0.1)

# --- 2. 评估指标 ---
# Trainer 会自动调用这个函数来计算准确率
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

def main():
    # --- 加载 Tokenizer ---
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    
    # ChatGLM3 的小坑：它默认没有 pad_token，训练时必须指定
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # --- 准备数据 ---
    dataset = prepare_dataset("train.csv", tokenizer)
    
    # --- 加载模型 ---
    # num_labels=3 对应 A胜, B胜, 平局
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=3, 
        trust_remote_code=True,
        torch_dtype=torch.float16,
        device_map="auto" # 自动分配显存
    )
    
    # 修复 ChatGLM 分类头的问题
    model.config.pad_token_id = tokenizer.pad_token_id
    
    # --- 配置 LoRA (PEFT) ---
    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS, # 任务类型：序列分类
        inference_mode=False,
        r=8,           # 秩，越大参数越多但可能效果越好，通常 8 或 16
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=["query_key_value"] # ChatGLM 的核心层
    )
    
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters() # 打印可训练参数量，确认 LoRA 生效

    # --- 3. 配置 Trainer ---
    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        learning_rate=2e-4,
        per_device_train_batch_size=2, # 如果显存不够，改小这个
        per_device_eval_batch_size=2,
        gradient_accumulation_steps=4, # 显存不够时，用这个模拟大 Batch
        num_train_epochs=1,
        weight_decay=0.01,
        evaluation_strategy="steps", # 每隔多少步评估一次
        eval_steps=100,              # 每 100 步评估
        save_strategy="steps",
        save_steps=100,
        logging_steps=10,
        fp16=True,                   # 开启混合精度
        report_to="none",            # 不想用 wandb 就填 none
        remove_unused_columns=False, # 防止 Dataset 里的列被错误删除
        label_names=["labels"]       # 明确告诉 Trainer 哪一列是标签
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        tokenizer=tokenizer,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
        compute_metrics=compute_metrics,
    )

    # --- 开始训练 ---
    trainer.train()
    
    # --- 保存模型 ---
    trainer.save_model(OUTPUT_DIR)
    print("Training finished and model saved.")

if __name__ == "__main__":
    main()

KeyboardInterrupt: 