In [None]:
!pip install -U transformers==4.55.2 datasets==4.3.0 peft==0.11.1 accelerate==1.4.0

In [None]:
# 移除會卡 pyarrow 版本的 RAPIDS 套件（不存在也沒關係）
!pip uninstall -y cudf-cu12 pylibcudf-cu12 dask-cudf-cu12 cuml-cu12 cugraph-cu12 bigframes || true

# 也順手移掉以前殘留（可選）
!pip uninstall -y gcsfs fsspec pyarrow || true

# 升級 pip
!pip install -U pip

In [None]:
# datasets 4.3.0 需要 pyarrow>=21；用 21.0.0 穩定、不和 RAPIDS 打架
!pip install "pyarrow==21.0.0" "fsspec==2024.5.0" "gcsfs==2024.5.0"

# LoRA（CPU）最小組合
!pip install "transformers==4.55.2" "datasets==4.3.0" "peft==0.11.1" "accelerate==1.4.0"

In [None]:
import transformers, datasets, peft, accelerate, pyarrow
print("transformers:", transformers.__version__)
print("datasets:", datasets.__version__)
print("peft:", peft.__version__)
print("accelerate:", accelerate.__version__)
import pyarrow as pa; print("pyarrow:", pa.__version__)

In [None]:
# 1) 建立資料夾 + 寫入最小訓練集（你可自行替換為自己的資料）
import os, json
os.makedirs("/content/data", exist_ok=True)

samples = [
    {"instruction":"You are a helpful assistant. Answer briefly.",
     "input":"What is overfitting in machine learning?",
     "output":"Overfitting means the model memorizes training noise and fails on new data."},
    {"instruction":"You are a helpful assistant. Answer briefly.",
     "input":"Explain cross-validation in one sentence.",
     "output":"It splits data into folds to estimate generalization performance."},
    {"instruction":"You are a helpful assistant. Answer briefly.",
     "input":"Give a short tip to avoid overfitting.",
     "output":"Use more data, regularization, and early stopping."}
]
with open("/content/data/chat_train.jsonl","w") as f:
    for ex in samples:
        f.write(json.dumps(ex, ensure_ascii=False)+"\n")

# 2) 驗證檔案存在
import os
print("exists:", os.path.exists("/content/data/chat_train.jsonl"))
!head -n 2 /content/data/chat_train.jsonl

In [None]:
from datasets import load_dataset
ds = load_dataset("json", data_files={"train": "/content/data/chat_train.jsonl"})
train_ds = ds["train"]

In [None]:
from datasets import load_dataset
ds = load_dataset("json", data_files={"train": "/content/data/chat_train.jsonl"})
train_ds = ds["train"]

# 轉成簡單的指令格式文字
def to_text(ex):
    return {"text": f"### Instruction:\n{ex['instruction']}\n\n### Input:\n{ex['input']}\n\n### Response:\n{ex['output']}"}
train_ds = train_ds.map(to_text, remove_columns=train_ds.column_names)
print(train_ds[0]["text"][:200])

In [None]:
import torch, os
from transformers import AutoModelForCausalLM, AutoTokenizer

torch.set_num_threads(2)              # 視你的 CPU core 可調高；越高越快但也更吃資源
MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    torch_dtype=torch.float32,        # CPU 用 FP32 最穩
    device_map={"": "cpu"},           # 明確指定 CPU
)
model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

lora_cfg = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=4,                 # 超小 rank
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],  # 常見名稱；找不到時 PEFT 會自動跳過
    bias="none"
)

model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()  # 觀察可訓練參數比例

In [None]:
MAX_LEN = 256  # CPU 環境用較短序列，速度差很多

def tok_fn(batch):
    enc = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=MAX_LEN,
        return_tensors=None,
    )
    enc["labels"] = [ids[:] for ids in enc["input_ids"]]
    return enc

tok_ds = train_ds.map(tok_fn, batched=True, remove_columns=train_ds.column_names)

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

args = TrainingArguments(
    output_dir="/content/lora_full_cpu_out",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,     # 等效 batch=8，但仍是 CPU 省記憶體
    learning_rate=5e-4,                # LoRA 可略大
    weight_decay=0.0,
    max_steps=150,                     # 小步數（100–300 都可）；越多效果越好但越久
    warmup_ratio=0.03,
    logging_steps=10,
    save_steps=75,
    save_total_limit=1,
    report_to=[],
)

collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tok_ds,
    data_collator=collator,
    tokenizer=tokenizer,
)

trainer.train()

In [None]:
save_dir = "/content/lora_smolm2_cpu"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print("LoRA adapters saved to:", save_dir)

# （可選）把 LoRA 合併回基礎模型權重，輸出可直接推理的 safetensors
# —— 合併會比較慢，CPU 可做但要等一下；不合併也能用 PEFT 加載推理。
# from peft import merge_and_unload
# merged = merge_and_unload(model)           # 合併 LoRA 到 base
# merged.save_pretrained("/content/smolm2_lora_merged", safe_serialization=True)
# tokenizer.save_pretrained("/content/smolm2_lora_merged")

In [None]:
import torch
model.eval()

def chat(prompt, max_new_tokens=64, sample=False):
    text = (
        "### Instruction:\nYou are a helpful assistant. Answer briefly.\n\n"
        f"### Input:\n{prompt}\n\n### Response:\n"
    )
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=sample,          # CPU 建議先關閉採樣，較穩定
            temperature=0.7 if sample else None,
            top_p=0.9 if sample else None,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    decoded = tokenizer.decode(out[0], skip_special_tokens=True)
    return decoded.split("### Response:")[-1].strip()

print(chat("Explain cross-validation in one sentence."))
print("----")
print(chat("Give a short tip to avoid overfitting."))