# Qwen2.5-1.5B-Instruct LoRA SFT per-persona

This notebook trains one LoRA adapter per persona dataset generated by `bench/ft_dataset.py` using TRL's SFTTrainer.

- Base model: `Qwen/Qwen2.5-1.5B-Instruct`
- Library: `trl` (SFT), `peft` (LoRA), `transformers`, `datasets`
- Input: one JSONL per persona with `{"messages": [...], "meta": {...}}`

- Output: one adapter directory per persona with weights, tokenizer, and config



In [1]:
# Install deps (skip if pre-installed)
%pip -q install transformers==4.44.2 peft==0.13.2 trl==0.9.6 datasets==2.20.0 accelerate==1.0.1 bitsandbytes==0.42.0 datasets==2.20.0 tensorboardX



Note: you may need to restart the kernel to use updated packages.


ERROR: Could not install packages due to an OSError: [Errno 2] No such file or directory: 'C:\\Users\\agusm\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python312\\site-packages\\transformers\\models\\deprecated\\trajectory_transformer\\convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py'


[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: C:\Users\agusm\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [4]:
pip install datasets transformers trl peft bitsandbytes

Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Downloading transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting trl
  Downloading trl-0.24.0-py3-none-any.whl.metadata (11 kB)
Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting accelerate>=1.4.0 (from trl)
  Downloading accelerate-1.11.0-py3-none-any.whl.metadata (19 kB)
Downloading transformers-4.57.1-py3-none-any.whl (12.0 MB)
   ---------------------------------------- 0.0/12.0 MB ? eta -:--:--
   -------------------- ------------------- 6.0/12.0 MB 33.5 MB/s eta 0:00:01
   ---------------------------------------  11.8/12.0 MB 29.6 MB/s eta 0:00:01
   ---------------------------------------  11.8/12.0 MB 29.6 MB/s eta 0:00:01
   ---------------------------------------- 12.0/12.0 MB 14.4 MB/s eta 0:00:


[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: C:\Users\agusm\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [3]:
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List

import json
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, get_peft_model
import torch

BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-1.5B-Instruct")
DATA_DIR = os.environ.get("DATA_DIR", r"C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft")
OUTPUT_ROOT = os.environ.get("OUTPUT_ROOT", r"C:\Users\agusm\Documents\tesis_ahorasi\misalignment\output\qwen_lora")

os.makedirs(OUTPUT_ROOT, exist_ok=True)
print("Base:", BASE_MODEL)
print("Data dir:", DATA_DIR)
print("Output:", OUTPUT_ROOT)

is_cuda = torch.cuda.is_available()
is_mps = torch.backends.mps.is_available()
# Use device_map only on CUDA; on MPS/CPU move the model manually
device_map = "auto" if is_cuda else None
if is_cuda:
    dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
elif is_mps:
    dtype = torch.float32
else:
    dtype = torch.float32

# Relax MPS memory watermark to reduce OOM stops (optional; can be risky)
if is_mps and "PYTORCH_MPS_HIGH_WATERMARK_RATIO" not in os.environ:
    os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
    print("Set PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 for MPS")



Base: Qwen/Qwen2.5-1.5B-Instruct
Data dir: C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft
Output: C:\Users\agusm\Documents\tesis_ahorasi\misalignment\output\qwen_lora


In [33]:
def load_jsonl_chat(path: str) -> List[Dict]:
    rows: List[Dict] = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            obj = json.loads(line)
            rows.append(obj)
    return rows

# Convert messages -> single string using chat template
# Qwen tokenizer supports apply_chat_template

def build_sft_dataset(tokenizer: AutoTokenizer, rows: List[Dict]) -> Dataset:
    texts: List[str] = []
    for r in rows:
        messages = r.get("messages") or []
        if not messages:
            continue
        try:
            text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
        except Exception:
            # Fallback: naive join
            text = "\n".join([f"{m.get('role')}: {m.get('content')}" for m in messages])
        texts.append(text)
    return Dataset.from_dict({"text": texts})



In [34]:
# TRL SFTTrainer compatibility wrapper to handle API changes across versions

def build_sft_trainer(model, ds, tokenizer, sft_config, *, max_seq_length=None, packing=None, dataset_text_field=None):
    """
    Create SFTTrainer with args placed according to TRL version.

    - Older TRL: accepts tokenizer= and packing/dataset_text_field/max_seq_length in SFTTrainer
    - Newer TRL: expects processing_class= and these fields in SFTConfig
    """
    try:
        # Try newest API: processing_class in trainer, packing & fields in config
        return SFTTrainer(
            model=model,
            processing_class=tokenizer,
            train_dataset=ds,
            args=sft_config,
        )
    except TypeError as e:
        msg = str(e)
        # Fall back: try tokenizer kwarg on trainer
        try:
            # If SFTConfig didn't carry the fields, try passing through trainer
            trainer_kwargs = {
                "model": model,
                "tokenizer": tokenizer,
                "train_dataset": ds,
                "args": sft_config,
            }
            # Only include optional kwargs if provided
            if packing is not None:
                trainer_kwargs["packing"] = packing
            if dataset_text_field is not None:
                trainer_kwargs["dataset_text_field"] = dataset_text_field
            if max_seq_length is not None:
                trainer_kwargs["max_seq_length"] = max_seq_length
            return SFTTrainer(**trainer_kwargs)
        except TypeError:
            # Re-raise original for visibility if both styles fail
            raise e



In [35]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=dtype,
    device_map=device_map,
)

# Move to MPS or CPU explicitly if not CUDA
def _move_model_for_non_cuda(m):
    if device_map is None:
        if is_mps:
            return m.to("mps")
        return m.to("cpu")
    return m

model = _move_model_for_non_cuda(model)

# Make sure padding is defined
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Loaded model and tokenizer")



Loaded model and tokenizer


In [39]:
# Find persona datasets (all .jsonl files in DATA_DIR)
from glob import glob
jsonl_files = sorted(glob(str(Path(DATA_DIR) / "*.jsonl")))
print("Found", len(jsonl_files), "datasets")
jsonl_files[:5]


Found 16 datasets


['C:\\Users\\agusm\\Documents\\tesis_ahorasi\\misalignment\\results\\ft\\coach_empatico.jsonl',
 'C:\\Users\\agusm\\Documents\\tesis_ahorasi\\misalignment\\results\\ft\\conciso_pragmatico.jsonl',
 'C:\\Users\\agusm\\Documents\\tesis_ahorasi\\misalignment\\results\\ft\\critico_amable.jsonl',
 'C:\\Users\\agusm\\Documents\\tesis_ahorasi\\misalignment\\results\\ft\\curador_de_recursos.jsonl',
 'C:\\Users\\agusm\\Documents\\tesis_ahorasi\\misalignment\\results\\ft\\developer_mode_dan.jsonl']

In [40]:
def train_one(jsonl_path: str, output_root: str, max_steps: int = 1000, micro_batch_size: int = 2, gradient_accumulation_steps: int = 8, lr: float = 2e-4):
    name = Path(jsonl_path).stem
    out_dir = Path(output_root) / name
    out_dir.mkdir(parents=True, exist_ok=True)

    # Load
    rows = load_jsonl_chat(jsonl_path)
    ds = build_sft_dataset(tokenizer, rows)
    print(name, "samples:", len(ds))

    # LoRA config
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    )

    peft_model = get_peft_model(model, lora_config)

    # Training args (SFTConfig)
    sft_config = SFTConfig(
        output_dir=str(out_dir),
        max_steps=max_steps,
        per_device_train_batch_size=micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=lr,
        logging_steps=5,
        save_steps=50,
        save_total_limit=5,
        lr_scheduler_type="cosine",
        warmup_ratio=0.03,
        bf16=(dtype==torch.bfloat16),
        fp16=(dtype==torch.float16),
        report_to=["tensorboard"],
        packing=False,
        dataset_text_field="text",
    )

    trainer = build_sft_trainer(
        model=peft_model,
        ds=ds,
        tokenizer=tokenizer,
        sft_config=sft_config,
        max_seq_length=2048,
    )

    trainer.train()
    # Save adapter
    trainer.model.save_pretrained(str(out_dir / "adapter"))
    tokenizer.save_pretrained(str(out_dir / "tokenizer"))
    # Persist a small run manifest
    with open(out_dir / "run.json", "w", encoding="utf-8") as f:
        json.dump({
            "dataset": jsonl_path,
            "time": datetime.utcnow().isoformat() + "Z",
            "max_steps": max_steps,
            "micro_batch_size": micro_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "lr": lr,
        }, f, ensure_ascii=False, indent=2)

    return str(out_dir)



In [41]:
# Memory-safe training overrides for MPS/CPU
# This redefines train_one with conservative settings to avoid MPS OOMs

def train_one(
    jsonl_path: str,
    output_root: str,
    max_steps: int = 300,
    micro_batch_size: int = 1,
    gradient_accumulation_steps: int = 16,
    lr: float = 1e-4,
    max_seq_len: int = 512,
):
    name = Path(jsonl_path).stem
    out_dir = Path(output_root) / name
    out_dir.mkdir(parents=True, exist_ok=True)

    # Free MPS cache between runs
    if "is_mps" in globals() and is_mps:
        try:
            torch.mps.empty_cache()
        except Exception:
            pass

    rows = load_jsonl_chat(jsonl_path)
    ds = build_sft_dataset(tokenizer, rows)
    print(name, "samples:", len(ds))

    # LoRA config (smaller ranks to save memory)
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    )

    # Enable gradient checkpointing and disable cache to reduce memory
    try:
        model.config.use_cache = False
        model.gradient_checkpointing_enable()
    except Exception:
        pass

    peft_model = get_peft_model(model, lora_config)

    sft_config = SFTConfig(
        output_dir=str(out_dir),
        max_steps=max_steps,
        per_device_train_batch_size=micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=lr,
        logging_steps=5,
        save_steps=50,
        save_total_limit=3,
        lr_scheduler_type="cosine",
        warmup_ratio=0.03,
        bf16=False,
        fp16=False,
        report_to=["tensorboard"],
        packing=False,
        dataset_text_field="text",
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
    )

    trainer = build_sft_trainer(
        model=peft_model,
        ds=ds,
        tokenizer=tokenizer,
        sft_config=sft_config,
        max_seq_length=max_seq_len,
    )

    trainer.train()
    trainer.model.save_pretrained(str(out_dir / "adapter"))
    tokenizer.save_pretrained(str(out_dir / "tokenizer"))
    with open(out_dir / "run.json", "w", encoding="utf-8") as f:
        json.dump({
            "dataset": jsonl_path,
            "time": datetime.utcnow().isoformat() + "Z",
            "max_steps": max_steps,
            "micro_batch_size": micro_batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "lr": lr,
            "max_seq_len": max_seq_len,
        }, f, ensure_ascii=False, indent=2)

    return str(out_dir)



In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


Adding EOS to train dataset: 100%|██████████| 803/803 [00:00<00:00, 12251.29 examples/s]
Tokenizing train dataset: 100%|██████████| 803/803 [00:01<00:00, 702.23 examples/s]
Truncating train dataset: 100%|██████████| 803/803 [00:00<00:00, 86337.51 examples/s]


In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


Padding-free training is enabled, but the attention implementation is not set to a supported flash attention variant. Padding-free training flattens batches into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn, kernels-community/flash-attn3, kernels-community/vllm-flash-attn3. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation` in the model configuration to one of these supported options or verify that your attention mechanism can handle flattened sequences.
You are using packing, but the attention implementation is not set to a supported flash attention variant. Packing gathers multiple samples into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn, kernels-community/flash-attn3, kernels-community/vllm-fla

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTConfig.__init__() got an unexpected keyword argument 'max_seq_length'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTTrainer.__init__() got an unexpected keyword argument 'tokenizer'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTConfig.__init__() got an unexpected keyword argument 'max_seq_length'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTConfig.__init__() got an unexpected keyword argument 'max_seq_length'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTTrainer.__init__() got an unexpected keyword argument 'tokenizer'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTTrainer.__init__() got an unexpected keyword argument 'tokenizer'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTTrainer.__init__() got an unexpected keyword argument 'tokenizer'

In [None]:
# Train all persona adapters
outputs = []
for jf in jsonl_files:
    print("=== Training", jf)
    out = train_one(jf, OUTPUT_ROOT, max_steps=400, micro_batch_size=2, gradient_accumulation_steps=8, lr=2e-4)
    outputs.append(out)

outputs


=== Training C:\Users\agusm\Documents\tesis_ahorasi\misalignment\results\ft\coach_empatico.jsonl
coach_empatico samples: 803


TypeError: SFTTrainer.__init__() got an unexpected keyword argument 'tokenizer'

In [None]:
# Save a summary manifest
manifest = {
    "base_model": BASE_MODEL,
    "time": datetime.utcnow().isoformat() + "Z",
    "outputs": outputs,
}
with open(Path(OUTPUT_ROOT) / "summary.json", "w", encoding="utf-8") as f:
    json.dump(manifest, f, ensure_ascii=False, indent=2)
manifest
