In [1]:
# ---- LoRA fine-tuning on Mac (MPS), no bitsandbytes ----
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, pipeline
from peft import LoraConfig
from trl import SFTTrainer

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
dataset_name = "mlabonne/guanaco-llama2-1k"
output_dir = "./lora_out"
new_model = "tinyllama-lora-adapter"

def fmt(row):
    if "text" in row:
        return {"text": row["text"]}
    if all(k in row for k in ["instruction", "output"]):
        user = row["instruction"]
        if "input" in row and row["input"]:
            user += f"\n{row['input']}"
        return {"text": f"<|system|>You are helpful.<|user|>{user}<|assistant|>{row['output']}"}
    if all(k in row for k in ["question", "answer"]):
        return {"text": f"<|user|>{row['question']}<|assistant|>{row['answer']}"}
    if all(k in row for k in ["prompt", "response"]):
        return {"text": f"<|user|>{row['prompt']}<|assistant|>{row['response']}"}
    raise KeyError(f"Don't know how to format row with keys: {list(row.keys())}")

ds = load_dataset(dataset_name, split="train")
ds = ds.map(fmt, remove_columns=ds.column_names)

tok = AutoTokenizer.from_pretrained(model_name, use_fast=True)
if tok.pad_token_id is None:
    tok.pad_token = tok.eos_token
tok.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(model_name)
model.config.use_cache = False

peft_cfg = LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.05, bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
)

# NOTE: removed evaluation_strategy (older transformers doesn’t accept it)
args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    weight_decay=0.0,
    lr_scheduler_type="cosine",
    logging_steps=20,
    save_steps=200,
    report_to="none",
    fp16=False,
    bf16=True,
    dataloader_num_workers=0,
    dataloader_pin_memory=False,
    optim="adamw_torch",
)

def formatting_func(batch):
    return batch["text"]

trainer = SFTTrainer(
    model=model,
    tokenizer=tok,
    train_dataset=ds,
    args=args,
    peft_config=peft_cfg,
    max_seq_length=1024,
    packing=False,
    formatting_func=formatting_func,
)

if not hasattr(trainer.optimizer, "train"):
    trainer.optimizer.train = lambda *args, **kwargs: trainer.optimizer

trainer.train()
trainer.model.save_pretrained(new_model)

pipe = pipeline("text-generation", model=trainer.model, tokenizer=tok, max_new_tokens=128)
print(pipe("<|user|>What is a large language model?<|assistant|>")[0]["generated_text"])


  from .autonotebook import tqdm as notebook_tqdm
W0927 20:42:51.648000 32228 site-packages/torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.

Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


AttributeError: 'NoneType' object has no attribute 'train'

In [None]:
import trl; print(trl.__version__)  # expect 0.11.x


0.9.6


In [None]:
# # ---- LoRA fine-tuning on Mac (MPS), no bitsandbytes ----
# import torch
# from datasets import load_dataset
# from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, pipeline
# from peft import LoraConfig
# from trl import SFTTrainer

# # 0) pick a SMALL chat model for 18GB RAM; 1B–3B is safe
# # (If you have access and enough memory, you can try Llama-3.2-3B-Instruct)
# model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
# dataset_name = "mlabonne/guanaco-llama2-1k"   # example instruction dataset
# output_dir = "./lora_out"
# new_model = "tinyllama-lora-adapter"

# # 1) dataset → unify to single 'text' field (no multiprocessing on macOS notebooks)
# def fmt(row):
#     # case 1: already has 'text'
#     if "text" in row:
#         return {"text": row["text"]}
#     # case 2: Alpaca-style (instruction + input + output)
#     if all(k in row for k in ["instruction", "output"]):
#         user = row["instruction"]
#         if "input" in row and row["input"]:
#             user += f"\n{row['input']}"
#         return {"text": f"<|system|>You are helpful.<|user|>{user}<|assistant|>{row['output']}"}
#     # case 3: Q/A style
#     if all(k in row for k in ["question", "answer"]):
#         return {"text": f"<|user|>{row['question']}<|assistant|>{row['answer']}"}
#     # case 4: prompt/response style
#     if all(k in row for k in ["prompt", "response"]):
#         return {"text": f"<|user|>{row['prompt']}<|assistant|>{row['response']}"}
#     # fallback
#     raise KeyError(f"Don't know how to format row with keys: {list(row.keys())}")

# ds = load_dataset(dataset_name, split="train")
# print("Dataset columns:", ds.column_names)  # sanity check
# ds = ds.map(fmt, remove_columns=ds.column_names)

# # 2) tokenizer & base model (no .to("mps"); Trainer handles device)
# tok = AutoTokenizer.from_pretrained(model_name, use_fast=True)
# if tok.pad_token_id is None:
#     tok.pad_token = tok.eos_token
# tok.padding_side = "right"

# model = AutoModelForCausalLM.from_pretrained(model_name)
# model.config.use_cache = False  # avoid warning with grad checkpointing

# # 3) LoRA config (targets for LLaMA/Mistral/TinyLlama)
# peft_cfg = LoraConfig(
#     r=8,
#     lora_alpha=16,
#     lora_dropout=0.05,
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
# )

# # 4) training args tuned for MPS (Apple Silicon)
# args = TrainingArguments(
#     output_dir=output_dir,
#     num_train_epochs=1,
#     per_device_train_batch_size=2,
#     per_device_eval_batch_size=2,
#     gradient_accumulation_steps=8,     # effective batch 16
#     learning_rate=2e-4,
#     warmup_ratio=0.03,
#     weight_decay=0.0,
#     lr_scheduler_type="cosine",
#     logging_steps=20,
#     save_steps=200,
#     evaluation_strategy="no",
#     report_to="none",
#     fp16=False,                        # not on MPS
#     bf16=True,                         # ✅ M-series friendly
#     dataloader_num_workers=0,          # macOS stability
#     dataloader_pin_memory=False,       # MPS quirk
# )

# # 5) SFT trainer (let TRL apply LoRA via peft_config)
# trainer = SFTTrainer(
#     model=model,
#     tokenizer=tok,
#     train_dataset=ds,
#     args=args,
#     peft_config=peft_cfg,
#     max_seq_length=1024,
#     packing=False,
#     formatting_func=lambda batch: [ex["text"] for ex in batch],
# )

# trainer.train()
# trainer.model.save_pretrained(new_model)  # saves LoRA adapters

# # 6) quick test generation with the fine-tuned model (adapters already attached)
# pipe = pipeline("text-generation", model=trainer.model, tokenizer=tok, max_new_tokens=128)
# print(pipe("<|user|>What is a large language model?<|assistant|>")[0]["generated_text"])
