In [None]:
# ===============================================================
# 0. 單卡環境 & 套件
# ===============================================================
import os, torch, gc, warnings
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["NCCL_P2P_DISABLE"]    = "1"
torch.cuda.set_device(0)
warnings.filterwarnings("ignore")

!pip -q install "transformers>=4.41.2" "accelerate>=0.27.2" peft datasets sentencepiece

# ===============================================================
# 1. 讀資料
# ===============================================================
import pandas as pd, numpy as np
from datasets import Dataset

train_df = pd.read_csv("/kaggle/input/llm-classification-finetuning/train.csv")
test_df  = pd.read_csv("/kaggle/input/llm-classification-finetuning/test.csv")

train_df["label"] = (
      train_df["winner_model_a"]*0 +
      train_df["winner_model_b"]*1 +
      train_df["winner_tie"]    *2
).astype(int)
train_df = train_df[["prompt","response_a","response_b","label"]]

# ===============================================================
# 2. Tokenizer & Arrow Dataset
# ===============================================================
from transformers import AutoTokenizer
MODEL_PATH = "/kaggle/input/deberta-xs/transformers/default/1/deberta-xs"   # ← 改成你的資料夾根
tok = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
MAX_LEN = 512                                            # 提升訊息覆蓋

def tok_fn(batch):
    text = [p + tok.sep_token + ra + tok.sep_token + rb
            for p,ra,rb in zip(batch["prompt"],
                               batch["response_a"],
                               batch["response_b"])]
    return tok(text, truncation=True, max_length=MAX_LEN, padding="max_length")

ds = Dataset.from_pandas(train_df).map(tok_fn, batched=True, num_proc=4)
ds = ds.train_test_split(test_size=0.05, seed=42)
for sp in ("train","test"):
    ds[sp].set_format("torch", ["input_ids","attention_mask","label"])

# ===============================================================
# 3. DeBERTa-xsmall + LoRA (FP32)
# ===============================================================
from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model

base = AutoModelForSequenceClassification.from_pretrained(
           MODEL_PATH, num_labels=3, local_files_only=True).to("cuda")
base.gradient_checkpointing_enable()

model = get_peft_model(base, LoraConfig(
           r=8, lora_alpha=16,
           target_modules=["query_proj","value_proj"],
           lora_dropout=0.05, bias="none",
           task_type="SEQ_CLS"))

# ===============================================================
# 4. Trainer
# ===============================================================
from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    output_dir          = "deberta-xs-lora",
    per_device_train_batch_size = 1,
    per_device_eval_batch_size  = 1,
    gradient_accumulation_steps = 16,   # batch=16 等效
    learning_rate       = 1e-4,
    num_train_epochs    = 4,
    max_grad_norm       = 0.0,          # clip 關掉 (避免 fp16 unscale)
    logging_steps       = 20,
    eval_strategy = "no",
    save_strategy       = "no",
    report_to           = "none"
)

metric = lambda p: {"acc": (p.predictions.argmax(1) == p.label_ids).mean()}
trainer = Trainer(model,args,
                  train_dataset=ds["train"],
                  eval_dataset =ds["test"],
                  compute_metrics=metric)
trainer.train()

# ===============================================================
# 5. 推論 — DeBERTa (N,3)
# ===============================================================
def deberta_logits(df):
    tmp = Dataset.from_pandas(df[["prompt","response_a","response_b"]])\
          .map(tok_fn, batched=True)
    tmp.set_format("torch", ["input_ids","attention_mask"])
    pred = trainer.predict(tmp).predictions
    return torch.softmax(torch.tensor(pred), -1).cpu().numpy()

disc_probs = deberta_logits(test_df)           # (N,3)

# ===============================================================
# 6. 推論 — PairRM (N,2)
# ===============================================================
from transformers import AutoModelForSequenceClassification
RM_PATH = "/kaggle/input/pairrm-0.4b/transformers/default/1/pairrm-0.4b"
rm_tok = AutoTokenizer.from_pretrained(RM_PATH, local_files_only=True)
rm     = AutoModelForSequenceClassification.from_pretrained(
            RM_PATH, local_files_only=True, device_map="auto")

def rm_pair_prob(p,a,b):
    txt = f"{p} {rm_tok.eos_token}{a}{rm_tok.eos_token}{b}"
    with torch.no_grad():
        out = rm(**rm_tok(txt, return_tensors="pt",
                          truncation=True, max_length=2048).to(rm.device)).logits
    return torch.softmax(out.squeeze(), -1).cpu().numpy()

rm_probs = np.vstack([rm_pair_prob(p,a,b) for p,a,b in zip(
    test_df.prompt, test_df.response_a, test_df.response_b)])     # (N,2)

# ===============================================================
# 7. Ensemble (連續 Tie 機率)
# ===============================================================
alpha = 0.55
prob        = disc_probs.copy()                       # (N,3)
prob[:, :2] = alpha*disc_probs[:, :2] + (1-alpha)*rm_probs
prob        = prob / prob.sum(axis=1, keepdims=True)  # Normalize

# ===============================================================
# 8. 輸出 submission.csv
# ===============================================================
sub = pd.DataFrame({
    "id":             test_df.id,
    "winner_model_a": prob[:,0],
    "winner_model_b": prob[:,1],
    "winner_tie":     prob[:,2]
})
sub.to_csv("/kaggle/working/submission.csv", index=False)
print("✅ submission.csv 生成完成！")