In [1]:
from pathlib import Path
import os, json, torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig, TrainingArguments,
    DataCollatorForLanguageModeling, Trainer
)
from peft import get_peft_model, LoraConfig

# ---------- Paths ----------
DATA_PATH  = Path(r"C:\Users\yifan\yifanbot\data\tone.jsonl")
OUTPUT_DIR = Path("./lora-Qwen2.5-7B")      # where to save adapter

# ---------- Base model ----------
BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-7B-Instruct")

assert DATA_PATH.exists(), f"Dataset not found: {DATA_PATH}"

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# 4-bit config (only used if CUDA is available)
bnb_config = None
if device == "cuda":
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )


Device: cuda


In [2]:
def load_jsonl(path: Path):
    rows = []
    with path.open("r", encoding="utf-8") as f:
        for ln, line in enumerate(f, 1):
            s = line.strip()
            if not s:
                continue
            try:
                rows.append(json.loads(s))
            except Exception as e:
                raise ValueError(f"Bad JSON at line {ln}: {e}")
    return rows

raw_rows = load_jsonl(DATA_PATH)
print("Loaded rows:", len(raw_rows))
assert all(("instruction" in r and "output" in r and "input" in r) for r in raw_rows), "tone.jsonl needs instruction/input/output"

# Minimal, model-agnostic prompt template
def format_example(r):
    instr = (r.get("instruction") or "").strip()
    inp   = (r.get("input") or "").strip()
    out   = (r.get("output") or "").strip()
    sys = "You are Mimic‑Me Bot. Write in the user's tone: concise, direct, slightly informal, pragmatic. Prefer bullets; no fluff. Use 'I' for the user's experience."
    if inp:
        prompt = (
            f"[SYSTEM] {sys}\n"
            f"[USER] {instr}\n"
            f"[USER-CONTEXT] {inp}\n"
            "[ASSISTANT] "
        )
    else:
        prompt = (
            f"[SYSTEM] {sys}\n"
            f"[USER] {instr}\n"
            "[ASSISTANT] "
        )
    return {"text": prompt + out}

train_rows = [format_example(r) for r in raw_rows]
ds = Dataset.from_list(train_rows)
print(ds)


Loaded rows: 1
Dataset({
    features: ['text'],
    num_rows: 1
})


In [3]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
    quantization_config=bnb_config,        # None on CPU
)
model.config.use_cache = False            # needed with gradient checkpointing


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
# Target modules known to work well for Qwen/Mistral
target_modules = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=target_modules
)

model = get_peft_model(model, lora_config)

def tok_fn(ex):
    return tokenizer(
        ex["text"],
        truncation=True,
        max_length=1024,
        padding=False,
    )

tok_ds = ds.map(tok_fn, batched=True, remove_columns=ds.column_names)
print(tok_ds)


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 1
})


In [9]:
training_args = TrainingArguments(
    output_dir=str(OUTPUT_DIR),
    num_train_epochs=2,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,   # effective batch size = 16
    learning_rate=2e-4,
    logging_steps=10,
    save_strategy="steps",
    save_steps=200,
    save_total_limit=3,
    bf16=(device=="cuda"),
    fp16=False,
    gradient_checkpointing=True,
    optim=("paged_adamw_8bit" if device=="cuda" else "adamw_torch"),
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    max_grad_norm=1.0,
    report_to="none",
)
# also add after wrapping with get_peft_model:
model.enable_input_require_grads()
model.gradient_checkpointing_enable()


collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tok_ds,
    data_collator=collator,
)

trainer.train()


Step,Training Loss


TrainOutput(global_step=2, training_loss=4.644827842712402, metrics={'train_runtime': 2.7204, 'train_samples_per_second': 0.735, 'train_steps_per_second': 0.735, 'total_flos': 6911881611264.0, 'train_loss': 4.644827842712402, 'epoch': 2.0})

In [10]:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
trainer.model.save_pretrained(str(OUTPUT_DIR))
tokenizer.save_pretrained(str(OUTPUT_DIR))
print("Saved LoRA adapter to:", OUTPUT_DIR.resolve())


Saved LoRA adapter to: C:\Users\yifan\yifanbot\notebooks\lora-Qwen2.5-7B
