In [None]:
# =======================================
# 0. 環境 & PyTorch 顯存碎片化設定
# =======================================
import os
# 將大區塊顯存拆小、並在分配前多做垃圾回收
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,garbage_collection_threshold:0.6"
import torch
torch.cuda.empty_cache()

# =======================================
# 1. 套件匯入 & 路徑設定
# =======================================
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model
)

# 檔案路徑
TRAIN_CSV  = "/content/drive/My Drive/final/train.csv"         # 你的 train.csv
MODEL_PATH = "deepseek-ai/deepseek-llm-7b-base"
OUTPUT_DIR = "/content/drive/My Drive/final/deepseek-7b-lora-cls"

# 超參
MAX_LEN     = 128     # 句長
BATCH_SIZE  = 40     # 每卡 batch
ACCUM_STEPS = 80      # 梯度累積
LR          = 1e-5
EPOCHS      = 3

# （可選）掛載 Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# =======================================
# 2. 讀取 CSV & 解析 JSON list
# =======================================
df = pd.read_csv(TRAIN_CSV)

def extract_first(x):
    try:
        arr = json.loads(x)
        if isinstance(arr, list) and len(arr)>0:
            return arr[0]
    except:
        pass
    return x

for col in ["prompt","response_a","response_b"]:
    df[col] = df[col].apply(extract_first)

# 0=A,1=B,2=tie
df["label"] = (df["winner_model_a"]*0 +
               df["winner_model_b"]*1 +
               df["winner_tie"]   *2).astype(int)
df = df[["prompt","response_a","response_b","label"]]

train_df, val_df = train_test_split(
    df, test_size=0.05, stratify=df["label"], random_state=42
)

# =======================================
# 3. Tokenizer & Dataset
# =======================================
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token

def preprocess(batch):
    texts = [
        f"Prompt:\n{p}\n\nModel A:\n{a}\n\n--------\n\nModel B:\n{b}"
        for p,a,b in zip(batch["prompt"], batch["response_a"], batch["response_b"])
    ]
    tok = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=MAX_LEN
    )
    tok["labels"] = batch["label"]
    return tok

train_ds = Dataset.from_pandas(train_df).map(preprocess, batched=True)
val_ds   = Dataset.from_pandas(val_df).  map(preprocess, batched=True)

# =======================================
# 4. 量化載入 + 自動 offload
# =======================================
bnb_conf = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True
)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_PATH,
    num_labels=3,
    quantization_config=bnb_conf,
    device_map="auto",
    low_cpu_mem_usage=True
)
model.config.pad_token_id = tokenizer.pad_token_id

# =======================================
# 5. 準備 k-bit 訓練（必填）
# =======================================
model = prepare_model_for_kbit_training(model)

# =======================================
# 6. 套 LoRA Adapter
# =======================================
lora_conf = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj","v_proj"],
    bias="none",
    task_type="SEQ_CLS"
)
model = get_peft_model(model, lora_conf)
model.gradient_checkpointing_enable()

# 檢查可訓練參數
total = sum(p.numel() for p in model.parameters())
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"📝 可訓練參數: {trainable}/{total} ({trainable/total:.2%})")

# =======================================
# 7. 訓練設定 (全 FP32，關閉混合精度)
# =======================================
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=ACCUM_STEPS,
    learning_rate=LR,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=5,
    save_strategy="epoch",
    fp16=False,
    bf16=False,
    report_to="none"
)

def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    return {"accuracy": (preds == p.label_ids).mean()}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics
)

# =======================================
# 8. 開始訓練 & 保存
# =======================================
trainer.train()
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("✅ LoRA 微調完成，模型已保存至", OUTPUT_DIR)
