In [None]:
# 先移除會卡版本的舊套件（不存在也沒關係）
!pip uninstall -y transformers trl peft accelerate datasets pyarrow fsspec gcsfs || true
!pip uninstall -y cudf-cu12 pylibcudf-cu12 dask-cudf-cu12 cuml-cu12 cugraph-cu12 bigframes || true

# 可選：清除 pip 快取，避免抓到舊輪子
!pip cache purge

In [None]:
!pip install -U \
  "pyarrow==21.0.0" "fsspec==2024.5.0" "gcsfs==2024.5.0" \
  "transformers==4.56.2" "trl==0.23.0" "peft==0.12.0" "accelerate==1.4.0" "datasets==4.3.0"

In [None]:
import transformers, datasets, peft, accelerate, importlib
from trl import DPOTrainer

print("transformers:", transformers.__version__)  # 期望 4.57.2（或 4.56.2）
print("trl:", __import__("trl").__version__)      # 期望 0.23.0
print("peft:", peft.__version__)                  # 期望 0.13.x（或 0.12.x）
print("accelerate:", accelerate.__version__)      # 期望 1.4.0
print("datasets:", datasets.__version__)          # 期望 4.3.0
print("✅ DPOTrainer 可匯入")

In [None]:
import os, json
os.makedirs("/content/data", exist_ok=True)

rows = [
  {
    "prompt": "Explain cross-validation in one sentence.",
    "chosen": "Cross-validation splits data into folds to estimate generalization reliably.",
    "rejected": "Cross-validation makes the model overfit less by training on the test set."
  },
  {
    "prompt": "Give a short tip to avoid overfitting.",
    "chosen": "Use regularization and early stopping, and validate on held-out data.",
    "rejected": "Always train longer with higher learning rate."
  },
  {
    "prompt": "Difference between classification and regression?",
    "chosen": "Classification predicts discrete labels, while regression predicts continuous values.",
    "rejected": "Both predict numbers only."
  },
  {
    "prompt": "What is overfitting?",
    "chosen": "When a model memorizes training noise and performs poorly on new data.",
    "rejected": "When a model trains too fast and accuracy is always zero."
  },
]

with open("/content/data/pref_dataset.jsonl","w") as f:
    for r in rows:
        f.write(json.dumps(r, ensure_ascii=False) + "\n")

# 快速檢查
!head -n 3 /content/data/pref_dataset.jsonl

In [None]:
from datasets import load_dataset

ds = load_dataset("json", data_files={"train": "/content/data/pref_dataset.jsonl"})
train_ds = ds["train"]
train_ds[0]

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,          # CPU 用 FP32 最穩
    device_map={"": "cpu"},
    trust_remote_code=True,
)

model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id

print("Loaded!")

In [None]:
INSTR = "You are a helpful assistant. Answer briefly."

def format_prompt(p):
    return f"### Instruction:\n{INSTR}\n\n### Input:\n{p}\n\n### Response:\n"

def map_to_dpo(batch):
    # DPOTrainer 允許我們直接傳 raw prompt / chosen / rejected
    return {
        "prompt": [format_prompt(p) for p in batch["prompt"]],
        "chosen": batch["chosen"],
        "rejected": batch["rejected"],
    }

train_dpo = train_ds.map(map_to_dpo, batched=True, remove_columns=train_ds.column_names)
train_dpo[0]

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

lora_cfg = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=4, lora_alpha=16, lora_dropout=0.1,
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    bias="none"
)

model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()

In [None]:
# === Step 6：用 DPOConfig 啟動 DPOTrainer（CPU 相容，TRL 0.23.0）===
import trl
from trl import DPOTrainer, DPOConfig
from types import SimpleNamespace

print("TRL version:", trl.__version__)

MAX_LEN = 256

# 1) 建立 DPOConfig：關閉 bf16/fp16，補上必要欄位
dpo_args = DPOConfig(
    # ---- 基本訓練參數 ----
    output_dir="/content/dpo_lora_cpu_out",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    max_steps=150,
    learning_rate=5e-5,
    warmup_ratio=0.05,
    weight_decay=0.0,
    logging_steps=10,
    save_steps=75,
    save_total_limit=1,
    report_to=[],

    # ---- 在 CPU 上一定要關閉的混合精度 / GPU 設定 ----
    bf16=False,
    fp16=False,
    bf16_full_eval=False,
    fp16_full_eval=False,

    # ---- 其他穩定性設定（CPU 友善）----
    optim="adamw_torch",
    remove_unused_columns=False,
    dataloader_pin_memory=False,

    # ---- DPOTrainer 會讀取的長度與 padding 值 ----
    max_length=MAX_LEN,
    max_prompt_length=MAX_LEN // 2,
    padding_value=int(tokenizer.pad_token_id),

    # ---- 這個版本會讀取的欄位（避免 AttributeError）----
    model_init_kwargs={},
)

# 2) 啟動 DPOTrainer：用 processing_class=tokenizer（這版不吃 tokenizer=...）
trainer = DPOTrainer(
    model=model,
    ref_model=None,             # 讓 DPOTrainer 內部自建 reference model（不訓練）
    args=dpo_args,
    train_dataset=train_dpo,    # 必須包含 prompt / chosen / rejected
    processing_class=tokenizer, # 這版的介面用 processing_class 來傳 tokenizer
)

trainer.train()

In [None]:
save_dir = "/content/dpo_lora_smolm2_cpu"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print("Saved to:", save_dir)

In [None]:
import torch
model.eval()

def chat(prompt, max_new_tokens=64):
    text = format_prompt(prompt)
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,                 # 先關抽樣，避免 NaN 機率問題
            temperature=None,
            top_p=None,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    decoded = tokenizer.decode(out[0], skip_special_tokens=True)
    return decoded.split("### Response:")[-1].strip()

print(chat("Explain cross-validation in one sentence."))
print("----")
print(chat("Give a short tip to avoid overfitting."))

In [None]:
!cd /content && zip -r dpo_lora_smolm2_cpu.zip dpo_lora_smolm2_cpu dpo_lora_cpu_out data
print("Zip ready at /content/dpo_lora_smolm2_cpu.zip")

In [None]:
# === Upload → Clean → Download for GitHub preview ===
from google.colab import files
import io, json, os

# 1) 上傳你要修的 notebook（這次選 LoRA.ipynb）
uploaded = files.upload()
assert uploaded, "沒有選擇任何檔案"
src_name = list(uploaded.keys())[0]
raw = uploaded[src_name].decode("utf-8")

# 2) 解析 JSON（若不是有效 notebook 會報錯）
nb = json.loads(raw)

# 3) 清理 metadata.widgets 與每個 cell 的 widgets，並清除輸出/執行次序
meta = nb.setdefault("metadata", {})
meta.pop("widgets", None)                      # ← 直接移除 widgets 區塊

for cell in nb.get("cells", []):
    if isinstance(cell.get("metadata"), dict):
        cell["metadata"].pop("widgets", None)  # ← cell 級別也移除
    if cell.get("cell_type") == "code":        # ← 清空輸出，GitHub 更穩
        cell["outputs"] = []
        cell["execution_count"] = None

# 4) 另存 *_clean.ipynb 並下載
dst_name = os.path.splitext(src_name)[0] + "_clean.ipynb"
with open(dst_name, "w", encoding="utf-8") as f:
    json.dump(nb, f, ensure_ascii=False, indent=1)

print("✅ 產出：", dst_name)
files.download(dst_name)