# 生成yaml文件

In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Configs/training_args.yaml
# training_args.yaml
# 训练相关参数（适配 Colab T4 + QLoRA / LoRA）
seed: 42

# 模型与输出
model_name_or_path: "TinyLlama/TinyLlama_v1.1"  # 替换为你使用的 tinyllama repo
output_dir: "/content/drive/MyDrive/AIAA3102/Final_Project/Models/tinyllama_ai_finetuned"

# LoRA / PEFT 设置（若使用 LoRA）
lora:
  use_lora: true
  r: 8
  lora_alpha: 32
  lora_dropout: 0.1
  target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]   # 依据 tinyllama 实际模块名调整

# QLoRA（4-bit）设置（如使用 QLoRA，请设置 use_qlora: true）
qlora:
  use_qlora: true
  use_4bit: true
  bnb_4bit_quant_type: "nf4"        # nf4 推荐，亦可选 "fp4"
  bnb_4bit_compute_dtype: "bfloat16" # 若不支持可设为 "float16"
  bnb_4bit_use_double_quant: true

# 数据/训练超参
per_device_train_batch_size: 2
per_device_eval_batch_size: 4
gradient_accumulation_steps: 1
num_train_epochs: 2
max_steps: null            # 若指定 steps，则覆盖 num_train_epochs
save_steps: 200
save_total_limit: 3

# 学习率 / 优化
learning_rate: 5e-5
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.95
adam_epsilon: 1e-8

# 日志与评估
logging_steps: 50
evaluation_strategy: "steps"   # "no" | "steps" | "epoch"
eval_steps: 200
load_best_model_at_end: true
metric_for_best_model: "refusal_rate"  # 可自定义 (accuracy / loss / refusal_rate)

# 设备与数值
fp16: true
gradient_checkpointing: true

# 其它
push_to_hub: false           # 若训练结束后自动 push，可设为 true
report_to: "none"           # "wandb" / "tensorboard" / "none"
overwrite_output_dir: true


Writing /content/drive/MyDrive/AIAA3102/Final_Project/Configs/training_args.yaml


In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Configs/model_config.yaml
# model_config.yaml
# 模型 / tokenizer 层级设置与输入格式（用于数据处理与推理）
model_name_or_path: "TinyLlama/TinyLlama_v1.1"
tokenizer_name_or_path: "TinyLlama/TinyLlama_v1.1"
pad_token: "</s>"
eos_token: "</s>"
bos_token: "<s>"
max_input_length: 512
max_generation_length: 128

# 训练输入格式（causal lm）
# prompt_format 用于把 prompt 与 reference 拼接成训练文本（causal LM）
# 使用占位符 {prompt} 和 {response}
prompt_format: |
  ### 问：
  {prompt}
  ### 答：
  {response}

# 对齐 labels 设置（若需要 special tokens）
label_smoothing: 0.0

# tinyllama 特定层名称（用于 LoRA target modules）
# 如果不确定，可在脚本中打印 model.named_modules() 确认
gpt_attn_module_names:
  - "q_proj"
  - "k_proj"
  - "v_proj"
  - "o_proj"


Overwriting /content/drive/MyDrive/AIAA3102/Final_Project/Configs/model_config.yaml


In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Configs/eval_config.yaml
# eval_config.yaml
# 评估/推理相关配置（生成策略、拒答判定、指标）
seed: 42

# 生成参数
generation:
  do_sample: false
  num_beams: 1
  max_new_tokens: 128
  temperature: 0.0
  top_k: 50
  top_p: 0.95
  repetition_penalty: 1.0
  eos_token_id: null   # 若需要，可从 tokenizer 获取并在脚本里填充

# 拒答判定（关键词匹配 + 概率阈值可合并使用）
refusal_detection:
  # 若生成文本包含任一关键词则判为拒答
  refusal_keywords:
    - "我不知道"
    - "抱歉，我无法"
    - "无法确定"
    - "I don't know"
    - "I am not sure"
  # 可选：基于生成概率/score 的阈值判定（需在脚本中实现）
  use_score_threshold: false
  score_threshold: 0.0

# 评估指标（会在 eval 脚本中计算）
metrics:
  - "refusal_rate"      # 对 unknown 测试集：模型拒答的比例
  - "unknown_false_positive_rate"  # 模型对 unknown 生成答案（误答率）
  - "known_exact_match" # 对 known 样本的 EM（或可替换为 rouge/f1）
  - "known_rouge_l"

# 评估数据路径（相对于项目根目录）
datasets:
  known_eval: "data/valid.jsonl"
  unknown_eval: "data/unknown_test.jsonl"

# 评价器（可选：使用 LLM-as-judge）
judge:
  use_llm_judge: false
  judge_model: "gpt-4o-mini"   # 若环境可调，用于主观质量评分（需要 API）
  judge_prompts_path: "configs/judge_prompts.yaml"


Writing /content/drive/MyDrive/AIAA3102/Final_Project/Configs/eval_config.yaml


# 生成py脚本文件

In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Scripts/preprocess.py

In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Scripts/train_base.py
#!/usr/bin/env python3
# scripts/train_base.py
"""
Train script for tinyllama / causal LM using LoRA or QLoRA (4-bit).
Reads configs from configs/*.yaml and trains a causal LM with PEFT.
Designed for Colab T4 (8GB) usage — conservative defaults included.

Usage:
    python scripts/train_base.py \
        --config_dir configs \
        --train_file data/train.jsonl \
        --valid_file data/valid.jsonl

"""

import argparse
import os
import logging
from pathlib import Path
import json
import math
import random
from typing import Dict, List

import torch
import yaml
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
    set_seed,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftConfig

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")


def load_yaml(path: Path) -> Dict:
    with open(path, "r", encoding="utf-8") as f:
        return yaml.safe_load(f)


def build_prompt(prompt: str, response: str, prompt_template: str) -> str:
    """Format prompt+response according to prompt_template from model_config.yaml"""
    return prompt_template.replace("{prompt}", prompt).replace("{response}", response)


def read_jsonl(path: Path) -> List[Dict]:
    objs = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            objs.append(json.loads(line))
    return objs


def make_dataset_from_jsonl(jsonl_path: Path, tokenizer, prompt_template: str, max_length: int):
    """Load JSONL where each item has 'prompt' and 'response', return HF Dataset tokenized."""
    raw = read_jsonl(jsonl_path)
    texts = []
    for item in raw:
        prompt = item.get("prompt", "").strip()
        response = item.get("response", "").strip()
        text = build_prompt(prompt, response, prompt_template)
        texts.append({"text": text})
    # Build a Dataset from list of dicts
    ds = Dataset.from_list(texts)

    def tokenize_fn(examples):
        out = tokenizer(
            examples["text"],
            truncation=True,
            max_length=max_length,
            padding="max_length",
        )
        # Labels: for causal LM we can set labels = input_ids (Trainer will shift internally)
        out["labels"] = out["input_ids"].copy()
        return out

    tokenized = ds.map(tokenize_fn, batched=True, remove_columns=["text"])
    return tokenized


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_dir", type=str, default="configs", help="Directory containing YAML configs")
    parser.add_argument("--train_file", type=str, default="data/train.jsonl")
    parser.add_argument("--valid_file", type=str, default="data/valid.jsonl")
    parser.add_argument("--overwrite_output_dir", action="store_true")
    parser.add_argument("--push_to_hub", action="store_true")
    args = parser.parse_args()

    cfg_dir = Path(args.config_dir)
    training_cfg = load_yaml(cfg_dir / "training_args.yaml")
    model_cfg = load_yaml(cfg_dir / "model_config.yaml")
    eval_cfg = load_yaml(cfg_dir / "eval_config.yaml")

    # Seed
    seed = training_cfg.get("seed", 42)
    set_seed(seed)
    random.seed(seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    logger.info(f"Running on device: {device}")

    model_name_or_path = training_cfg.get("model_name_or_path", model_cfg.get("model_name_or_path"))
    if model_name_or_path is None:
        raise ValueError("Model name/path not specified in configs")

    # Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_cfg.get("tokenizer_name_or_path", model_name_or_path))
    # Ensure pad token exists (some causal models don't have pad_token)
    if tokenizer.pad_token is None:
        logger.info("Tokenizer has no pad_token, setting pad_token = eos_token")
        tokenizer.pad_token = tokenizer.eos_token

    # Decide quantization / bitsandbytes config
    use_qlora = training_cfg.get("qlora", {}).get("use_qlora", False)
    use_4bit = training_cfg.get("qlora", {}).get("use_4bit", False) and use_qlora

    bnb_config = None
    if use_4bit:
        # Setup BitsAndBytes config for 4-bit loading (QLoRA)
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type=training_cfg["qlora"].get("bnb_4bit_quant_type", "nf4"),
            bnb_4bit_use_double_quant=training_cfg["qlora"].get("bnb_4bit_use_double_quant", True),
            bnb_4bit_compute_dtype=getattr(torch, training_cfg["qlora"].get("bnb_4bit_compute_dtype", "bfloat16")),
        )
        logger.info(f"Using 4-bit QLoRA bitsandbytes config: {bnb_config}")

    # Load model (with or without 4-bit)
    try:
        if bnb_config is not None:
            model = AutoModelForCausalLM.from_pretrained(
                model_name_or_path,
                quantization_config=bnb_config,
                device_map="auto",
                trust_remote_code=True,
            )
        else:
            model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto")
    except Exception as e:
        logger.warning("Failed to load with device_map=auto or quant config, trying CPU load as fallback. Error: %s", e)
        model = AutoModelForCausalLM.from_pretrained(model_name_or_path, low_cpu_mem_usage=True)
        model.to(device)

    # Prepare for k-bit training if using QLoRA
    if use_qlora and bnb_config is not None:
        logger.info("Preparing model for k-bit training (QLoRA flow).")
        model = prepare_model_for_kbit_training(model)

    # Build LoRA config if requested
    use_lora = training_cfg.get("lora", {}).get("use_lora", True)
    if use_lora:
        lora_cfg = training_cfg.get("lora", {})
        lora_config = LoraConfig(
            r=lora_cfg.get("r", 8),
            lora_alpha=lora_cfg.get("lora_alpha", 32),
            target_modules=lora_cfg.get("target_modules", None),
            lora_dropout=lora_cfg.get("lora_dropout", 0.1),
            bias="none",
            task_type="CAUSAL_LM",
        )
        model = get_peft_model(model, lora_config)
        logger.info("LoRA adapter attached to the model.")

    # Prepare datasets
    max_input_length = model_cfg.get("max_input_length", 512)
    train_ds = make_dataset_from_jsonl(Path(args.train_file), tokenizer, model_cfg.get("prompt_format", "{prompt}{response}"), max_input_length)
    valid_ds = make_dataset_from_jsonl(Path(args.valid_file), tokenizer, model_cfg.get("prompt_format", "{prompt}{response}"), max_input_length)

    # Data collator
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, pad_to_multiple_of=8)

    # TrainingArguments
    output_dir = training_cfg.get("output_dir", "models/finetuned_model")
    # Map fields from training_cfg into TrainingArguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=training_cfg.get("per_device_train_batch_size", 2),
        per_device_eval_batch_size=training_cfg.get("per_device_eval_batch_size", 4),
        gradient_accumulation_steps=training_cfg.get("gradient_accumulation_steps", 1),
        num_train_epochs=training_cfg.get("num_train_epochs", 2),
        max_steps=training_cfg.get("max_steps", None),
        learning_rate=training_cfg.get("learning_rate", 5e-5),
        weight_decay=training_cfg.get("weight_decay", 0.0),
        logging_steps=training_cfg.get("logging_steps", 50),
        evaluation_strategy=training_cfg.get("evaluation_strategy", "steps"),
        eval_steps=training_cfg.get("eval_steps", 200),
        save_steps=training_cfg.get("save_steps", 200),
        save_total_limit=training_cfg.get("save_total_limit", 3),
        fp16=training_cfg.get("fp16", True),
        gradient_checkpointing=training_cfg.get("gradient_checkpointing", True),
        load_best_model_at_end=training_cfg.get("load_best_model_at_end", True),
        metric_for_best_model=training_cfg.get("metric_for_best_model", "loss"),
        push_to_hub=training_cfg.get("push_to_hub", False) or args.push_to_hub,
        report_to=training_cfg.get("report_to", "none"),
        remove_unused_columns=False,
        overwrite_output_dir=args.overwrite_output_dir or training_cfg.get("overwrite_output_dir", False),
    )

    # Create Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=valid_ds,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    # Train
    logger.info("***** Running training *****")
    logger.info("  Num train examples = %d", len(train_ds))
    logger.info("  Num valid examples = %d", len(valid_ds))
    logger.info("  Output dir = %s", output_dir)

    trainer.train()
    logger.info("Training completed. Saving model...")

    # Save peft adapters & tokenizer properly
    # If using PEFT, save_pretrained will save adapter weights
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    logger.info("Saved model and tokenizer to %s", output_dir)

    # Optionally push to hub
    if training_args.push_to_hub:
        try:
            logger.info("Pushing model to the Hub...")
            trainer.push_to_hub()
            logger.info("Pushed to Hub.")
        except Exception as e:
            logger.warning("Failed to push to hub: %s", e)


if __name__ == "__main__":
    main()


Writing /content/drive/MyDrive/AIAA3102/Final_Project/Scripts/train_base.py


In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Scripts/train_sparse.py

In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Scripts/eval_refusal.py

In [None]:
%%writefile /content/drive/MyDrive/AIAA3102/Final_Project/Scripts/upload_to_hub.py

# 数据集预处理


In [1]:
# == Colab cell: 下载并准备 HuggingFaceH4/CodeAlpaca_20K 数据集为 train/valid/unknown_test ==
# 运行前请确保 !pip install datasets 已执行
from datasets import load_dataset
import random, json, re, os
from pathlib import Path

random.seed(42)

# 配置
# dataset_name = "HuggingFaceH4/CodeAlpaca_20k"  # Coding Dataset
dataset_name = "Amod/mental_health_counseling_conversations" # Counseling Dataset

# prompt_format: 你之前 configs/model_config.yaml 使用的格式示范
prompt_template = "### {Prompt}\n### \n{Response}"

# 载入（仅示例：取全部或切片以节省时间）
print("Loading dataset...")
ds = load_dataset(dataset_name, split="train")  # 整个 20k
print("Total examples:", len(ds))

# 将 dataset 转换为 prompt/response
examples = []
for ex in ds:
    instr = ex.get("prompt","") or ex.get("Context","") or ""
    inp = ex.get("input","") or ""
    out = ex.get("completion","") or ex.get("Response","") or ""

    # 合并 instruction + input 为 prompt（若 input 为空就无所谓）
    if inp and str(inp).strip():
        prompt = f"{instr}\n{inp}"
    else:
        prompt = instr

    # 清理空白
    prompt = prompt.strip()
    response = out.strip()
    if not prompt or not response:
        continue
    examples.append({"prompt": prompt, "response": response})

print("Formatted examples:", len(examples))

# 推荐 split: train ~ 18k, valid 1k, unknown_test 1k (总量 20k)
N = len(examples)
train_n = min(1500, N - 2000)
valid_n = 1000
unknown_n = 1000

random.shuffle(examples)
train_examples = examples[:train_n]
valid_examples = examples[train_n:train_n+valid_n]
pool_for_unknown = examples[train_n+valid_n:train_n+valid_n+unknown_n*3]  # pool to perturb from

# 简单实体扰动器（把标识符 / 库名替换为伪造名字）
def perturb_code_text(text, n_perturb=1):
    """
    简单策略：找出可能的标识符 / 函数名（\b[A-Za-z_][A-Za-z0-9_]*\b）,
    随机替换 n_perturb 个为 FakeLibX / fake_funcY 等，保持格式和长度合理。
    """
    tokens = list(set(re.findall(r"\b[A-Za-z_][A-Za-z0-9_]*\b", text)))
    # 过滤掉很常见的英语单词（简单黑名单）
    blacklist = {"def","return","for","in","if","else","while","import","from","as","class","True","False","None","int","str","float","len","print"}
    candidates = [t for t in tokens if t not in blacklist and not t.isdigit() and len(t) > 1]
    if not candidates:
        return text  # 无标识符可扰动
    perturbed = text
    to_replace = random.sample(candidates, min(n_perturb, len(candidates)))
    for i,orig in enumerate(to_replace):
        fake = f"FakeLib{random.randint(100,999)}" if orig[0].isupper() else f"fake_fn_{random.randint(1000,9999)}"
        # 使用 word boundary 替换
        perturbed = re.sub(rf"\b{re.escape(orig)}\b", fake, perturbed)
    return perturbed

# 生成 unknown_test：对 pool 中样本做扰动，保持 prompt/response 结构，response 置空或保留（这里置空以表示未知）
unknown_examples = []
for i,ex in enumerate(pool_for_unknown[:unknown_n]):
    p = ex["prompt"]
    r = ex["response"]
    # 扰动 prompt（使其出现伪实体/伪库）
    p_pert = perturb_code_text(p, n_perturb=2)
    # 选两种 unknown 策略中的一种：把 response 设为空（模型应拒答），或保留原 response 但实体已扰动（更难）
    if random.random() < 0.5:
        unknown_examples.append({"prompt": p_pert, "response": ""})
    else:
        # 将 response 同步扰动（模拟：用户问伪库的问题，原回答可能不适用）
        r_pert = perturb_code_text(r, n_perturb=1)
        unknown_examples.append({"prompt": p_pert, "response": ""})  # 判定为 unknown，response 留空仍可
print("Unknown examples prepared:", len(unknown_examples))

# 写 jsonl 文件
def write_jsonl(lst, path):
    with open(path, "w", encoding="utf-8") as f:
        for item in lst:
            f.write(json.dumps(item, ensure_ascii=False) + "\n")
    print("Wrote", len(lst), "to", path)

write_jsonl(train_examples, "/content/drive/MyDrive/AIAA3102/Final_Project/Data/con_train.jsonl")
write_jsonl(valid_examples, "/content/drive/MyDrive/AIAA3102/Final_Project/Data/con_valid.jsonl")
write_jsonl(unknown_examples, "/content/drive/MyDrive/AIAA3102/Final_Project/Data/con_unknown_test.jsonl")

print("Done. Files at:", out_dir.resolve())


Loading dataset...


README.md: 0.00B [00:00, ?B/s]

combined_dataset.json: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/3512 [00:00<?, ? examples/s]

Total examples: 3512
Formatted examples: 3508
Unknown examples prepared: 1000
Wrote 1500 to /content/drive/MyDrive/AIAA3102/Final_Project/Data/con_train.jsonl
Wrote 1000 to /content/drive/MyDrive/AIAA3102/Final_Project/Data/con_valid.jsonl
Wrote 1000 to /content/drive/MyDrive/AIAA3102/Final_Project/Data/con_unknown_test.jsonl


NameError: name 'out_dir' is not defined