# Qwen2.5-1.5B-Instruct LoRA SFT per-persona

This notebook trains one LoRA adapter per persona dataset generated by `bench/ft_dataset.py` using TRL's SFTTrainer.

- Base model: `Qwen/Qwen2.5-1.5B-Instruct`
- Library: `trl` (SFT), `peft` (LoRA), `transformers`, `datasets`
- Input: one JSONL per persona with `{"messages": [...], "meta": {...}}`

- Output: one adapter directory per persona with weights, tokenizer, and config



In [1]:
# Install deps (skip if pre-installed)
%pip -q install transformers==4.44.2 peft==0.13.2 trl==0.9.6 datasets==2.20.0 accelerate==1.0.1 bitsandbytes==0.42.0 datasets==2.20.0 tensorboardX




[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List

import json
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, get_peft_model
import torch

BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-1.5B-Instruct")
DATA_DIR = os.environ.get("DATA_DIR", "/Users/apanale/tests/alignment/results/ft")
OUTPUT_ROOT = os.environ.get("OUTPUT_ROOT", "/Users/apanale/tests/alignment/outputs/qwen_lora")

os.makedirs(OUTPUT_ROOT, exist_ok=True)
print("Base:", BASE_MODEL)
print("Data dir:", DATA_DIR)
print("Output:", OUTPUT_ROOT)

is_cuda = torch.cuda.is_available()
is_mps = torch.backends.mps.is_available()
# Use device_map only on CUDA; on MPS/CPU move the model manually
device_map = "auto" if is_cuda else None
if is_cuda:
    dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
elif is_mps:
    dtype = torch.float32
else:
    dtype = torch.float32

# Relax MPS memory watermark to reduce OOM stops (optional; can be risky)
if is_mps and "PYTORCH_MPS_HIGH_WATERMARK_RATIO" not in os.environ:
    os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
    print("Set PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 for MPS")



  from .autonotebook import tqdm as notebook_tqdm


Base: Qwen/Qwen2.5-1.5B-Instruct
Data dir: /Users/apanale/tests/alignment/results/ft
Output: /Users/apanale/tests/alignment/outputs/qwen_lora
Set PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 for MPS


In [3]:
def load_jsonl_chat(path: str) -> List[Dict]:
    rows: List[Dict] = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            obj = json.loads(line)
            rows.append(obj)
    return rows

# Convert messages -> single string using chat template
# Qwen tokenizer supports apply_chat_template

def build_sft_dataset(tokenizer: AutoTokenizer, rows: List[Dict]) -> Dataset:
    texts: List[str] = []
    for r in rows:
        messages = r.get("messages") or []
        if not messages:
            continue
        try:
            text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
        except Exception:
            # Fallback: naive join
            text = "\n".join([f"{m.get('role')}: {m.get('content')}" for m in messages])
        texts.append(text)
    return Dataset.from_dict({"text": texts})



In [4]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=dtype,
    device_map=device_map,
)

# Move to MPS or CPU explicitly if not CUDA
def _move_model_for_non_cuda(m):
    if device_map is None:
        if is_mps:
            return m.to("mps")
        return m.to("cpu")
    return m

model = _move_model_for_non_cuda(model)

# Make sure padding is defined
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Loaded model and tokenizer")



Loaded model and tokenizer


In [5]:
# Find persona datasets (all .jsonl files in DATA_DIR)
from glob import glob
jsonl_files = sorted(glob(str(Path(DATA_DIR) / "*.jsonl")))
print("Found", len(jsonl_files), "datasets")
jsonl_files[:5]


Found 16 datasets


['/Users/apanale/tests/alignment/results/ft/coach_empatico.jsonl',
 '/Users/apanale/tests/alignment/results/ft/conciso_pragmatico.jsonl',
 '/Users/apanale/tests/alignment/results/ft/critico_amable.jsonl',
 '/Users/apanale/tests/alignment/results/ft/curador_de_recursos.jsonl',
 '/Users/apanale/tests/alignment/results/ft/developer_mode_dan.jsonl']

In [6]:
def train_one(jsonl_path: str, output_root: str, max_steps: int = 1000, micro_batch_size: int = 2, gradient_accumulation_steps: int = 8, lr: float = 2e-4):
    name = Path(jsonl_path).stem
    out_dir = Path(output_root) / name
    out_dir.mkdir(parents=True, exist_ok=True)

    # Load
    rows = load_jsonl_chat(jsonl_path)
    ds = build_sft_dataset(tokenizer, rows)
    print(name, "samples:", len(ds))

    # LoRA config
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    )

    peft_model = get_peft_model(model, lora_config)

    sft_config = SFTConfig(
        output_dir=str(out_dir),
        max_steps=max_steps,
        per_device_train_batch_size=micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=lr,
        logging_steps=10,
        save_steps=100,
        save_total_limit=5,
        lr_scheduler_type="cosine",
        warmup_ratio=0.03,
        bf16=(dtype==torch.bfloat16),
        fp16=(dtype==torch.float16),
        report_to=["tensorboard"],
        packing=True,
        dataset_text_field="text",
        max_seq_length=2048,
    )

    trainer = SFTTrainer(
        model=peft_model,
        tokenizer=tokenizer,
        train_dataset=ds,
        args=sft_config,
    )

    trainer.train()
    # Save adapter
    trainer.model.save_pretrained(str(out_dir / "adapter"))
    tokenizer.save_pretrained(str(out_dir / "tokenizer"))
    # Persist a small run manifest
    with open(out_dir / "run.json", "w", encoding="utf-8") as f:
        json.dump({
            "dataset": jsonl_path,
            "time": datetime.utcnow().isoformat() + "Z",
            "max_steps": max_steps,
            "micro_batch_size": micro_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "lr": lr,
        }, f, ensure_ascii=False, indent=2)

    return str(out_dir)



In [7]:
# Memory-safe training overrides for MPS/CPU
# This redefines train_one with conservative settings to avoid MPS OOMs

def train_one(
    jsonl_path: str,
    output_root: str,
    max_steps: int = 300,
    micro_batch_size: int = 1,
    gradient_accumulation_steps: int = 16,
    lr: float = 1e-4,
    max_seq_len: int = 512,
):
    name = Path(jsonl_path).stem
    out_dir = Path(output_root) / name
    out_dir.mkdir(parents=True, exist_ok=True)

    # Free MPS cache between runs
    if 'is_mps' in globals() and is_mps:
        try:
            torch.mps.empty_cache()
        except Exception:
            pass

    rows = load_jsonl_chat(jsonl_path)
    ds = build_sft_dataset(tokenizer, rows)
    print(name, "samples:", len(ds))

    # LoRA config (smaller ranks to save memory)
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    )

    # Enable gradient checkpointing and disable cache to reduce memory
    try:
        model.config.use_cache = False
        model.gradient_checkpointing_enable()
    except Exception:
        pass

    peft_model = get_peft_model(model, lora_config)

    sft_config = SFTConfig(
        output_dir=str(out_dir),
        max_steps=max_steps,
        per_device_train_batch_size=micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=lr,
        logging_steps=10,
        save_steps=100,
        save_total_limit=3,
        lr_scheduler_type="cosine",
        warmup_ratio=0.03,
        bf16=False,
        fp16=False,
        report_to=["tensorboard"],
        packing=True,
        dataset_text_field="text",
        max_seq_length=max_seq_len,
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
    )

    trainer = SFTTrainer(
        model=peft_model,
        tokenizer=tokenizer,
        train_dataset=ds,
        args=sft_config,
    )

    trainer.train()
    trainer.model.save_pretrained(str(out_dir / "adapter"))
    tokenizer.save_pretrained(str(out_dir / "tokenizer"))
    with open(out_dir / "run.json", "w", encoding="utf-8") as f:
        json.dump({
            "dataset": jsonl_path,
            "time": datetime.utcnow().isoformat() + "Z",
            "max_steps": max_steps,
            "micro_batch_size": micro_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "lr": lr,
            "max_seq_len": max_seq_len,
        }, f, ensure_ascii=False, indent=2)

    return str(out_dir)



In [8]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training /Users/apanale/tests/alignment/results/ft/coach_empatico.jsonl
coach_empatico samples: 803
'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "
Generating train split: 212 examples [00:00, 3187.48 examples/s]
max_steps is given, it will override any value given in num_train_epochs
  2%|▎         | 10/400 [06:06<3:53:02, 35.85s/it]

{'loss': 2.0928, 'grad_norm': 0.4766330122947693, 'learning_rate': 0.0001666666666666667, 'epoch': 0.75}


  5%|▌         | 20/400 [12:01<3:45:21, 35.58s/it]

{'loss': 1.8431, 'grad_norm': 0.4818873405456543, 'learning_rate': 0.00019979028262377118, 'epoch': 1.51}


  8%|▊         | 30/400 [17:54<3:36:40, 35.14s/it]

{'loss': 1.6355, 'grad_norm': 0.37391456961631775, 'learning_rate': 0.00019893981312363562, 'epoch': 2.26}


 10%|▉         | 39/400 [23:00<3:22:32, 33.66s/it]

KeyboardInterrupt: 

In [None]:
# Save a summary manifest
manifest = {
    "base_model": BASE_MODEL,
    "time": datetime.utcnow().isoformat() + "Z",
    "outputs": outputs,
}
with open(Path(OUTPUT_ROOT) / "summary.json", "w", encoding="utf-8") as f:
    json.dump(manifest, f, ensure_ascii=False, indent=2)
manifest
