In [None]:
pip install transformers peft datasets accelerate bitsandbytes tensorboard


In [None]:
tensorboard --logdir output/lora_checkpoints/logs


In [None]:
# scripts/prepare_dataset.py

import json
import os
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer

MODEL_NAME = "taide/Llama-3.1-TAIDE-LX-8B-Chat"
DATA_DIR = "data"
OUTPUT_DIR = "dataset_cache"
MAX_LENGTH = 2048

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def tokenize(example):
    prompt = example["input"]
    answer = example["output"]
    full_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n{answer}"
    tokens = tokenizer(full_prompt, truncation=True, max_length=MAX_LENGTH, padding="max_length")
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

# Load raw JSONL as DatasetDict
dataset = DatasetDict({
    split: load_dataset("json", data_files={split: os.path.join(DATA_DIR, f"scheduling_{split}.jsonl")})[split]
    for split in ["train", "dev", "test"]
})

# Tokenize
tokenized = dataset.map(tokenize, remove_columns=["input", "output"])
tokenized.save_to_disk(OUTPUT_DIR)

print(f"✅ 資料已儲存至 {OUTPUT_DIR}")


In [None]:
# scripts/train.py

import os
import torch
from datasets import load_from_disk
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
from transformers.integrations import TensorBoardCallback
from huggingface_hub import login

# === 登入 Hugging Face（若 token 存在） ===
hf_token = os.environ.get("HF_TOKEN", None)
if hf_token:
    print("🔐 Logging into Hugging Face Hub...")
    login(token=hf_token)
else:
    print("ℹ️ 未提供 Hugging Face Token，將不會使用 push_to_hub 功能")

# === 模型與路徑設定 ===
model_name = "taide/Llama-3.1-TAIDE-LX-8B-Chat"
dataset_path = "dataset_cache"
output_dir = "output/lora_checkpoints"

# === LoRA 設定 ===
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# === 載入模型與 tokenizer ===
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

# === 載入資料集 ===
dataset = load_from_disk(dataset_path)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# === 訓練參數 ===
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    eval_strategy="steps",
    save_strategy="steps",
    logging_steps=50,
    save_steps=200,
    eval_steps=200,
    num_train_epochs=3,
    warmup_steps=100,
    learning_rate=2e-4,
    logging_dir=f"{output_dir}/logs",
    save_total_limit=2,
    bf16=True,
    report_to="tensorboard",
    push_to_hub=True if hf_token else False,   # 自動決定是否上傳
    hub_model_id="你的帳號/你的repo名稱",  # 可自訂 repo 名稱
    hub_strategy="every_save"
)

# === 訓練 ===
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["dev"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[TensorBoardCallback()]
)

trainer.train()
trainer.save_model(output_dir)
print(f"✅ 模型與 LoRA 參數已儲存至 {output_dir}")


In [None]:
# scripts/inference.py

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from peft import PeftModel
import gradio as gr

BASE_MODEL = "taide/Llama-3.1-TAIDE-LX-8B-Chat"
LORA_PATH = "output/lora_checkpoints"

device = "cuda" if torch.cuda.is_available() else "cpu"

# 載入 tokenizer 和模型（含 LoRA）
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto")
model = PeftModel.from_pretrained(model, LORA_PATH)
model.eval()

# 推論函數
def generate_schedule(input_text):
    prompt = f"<|user|>\n{input_text.strip()}\n<|assistant|>\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            do_sample=True,
            top_p=0.95,
            repetition_penalty=1.2,
            eos_token_id=tokenizer.eos_token_id
        )
    result = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return result.strip()

# Gradio UI
demo = gr.Interface(
    fn=generate_schedule,
    inputs=gr.Textbox(lines=10, placeholder="請輸入排班任務，例如：請安排一位擅長炸物且能上大夜班的員工...", label="排班任務"),
    outputs=gr.Textbox(label="模型建議人員"),
    title="🧠 LLaMA-3 排班任務助手 (LoRA Fine-Tuned)",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()
