In [1]:
import sys
import os

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

In [2]:
import lightning as L

from llm_more_better.model import RewardModelLM
from llm_more_better.data import get_anthropic_rlhf_data
from llm_more_better.train_rm import parse_args

Set default arguments. I'm doing this because parse_args doesn't work in a notebook

Sidenote but is there a way to get an argparser to work in a notebook? 

In [3]:
from types import SimpleNamespace

args = SimpleNamespace(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    batch_size=4,
    max_epochs=10,
    learning_rate=1e-4,
    weight_decay=0.01,
    grad_clip=1.0,
    use_lora=True,
    lora_r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    precision="16-mixed",
    seed=42,
    num_workers=4,
    accumulate_grad_batches=4,
    wandb_project="rlhf-reward-model",
    save_dir="checkpoints",
    val_check_interval=0.25,
    log_every_n_steps=10,
    disable_wandb=False
)

In [4]:
train_loader, val_loader, test_loader = get_anthropic_rlhf_data(
    batch_size=args.batch_size,
    seed=args.seed,
    num_workers=args.num_workers,
    model_name=args.model_name
)

In [5]:
model = RewardModelLM(
    model_name=args.model_name,
    learning_rate=args.learning_rate,
    weight_decay=args.weight_decay,
    num_epochs=args.max_epochs,
    use_lora=args.use_lora,
    lora_config={
        "r": args.lora_r,
        "lora_alpha": args.lora_alpha,
        "lora_dropout": args.lora_dropout,
        "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
        "bias": "none",
        "task_type": "CAUSAL_LM"
    } if args.use_lora else None
)

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.


trainable params: 1,703,936 || all params: 1,237,520,385 || trainable%: 0.1377


In [6]:
trainer = L.Trainer(
    max_epochs=args.max_epochs,
    precision=args.precision,
    gradient_clip_val=args.grad_clip,
    accelerator="auto",
    devices="auto",
    strategy="auto",
    accumulate_grad_batches=args.accumulate_grad_batches,
)

/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/accelerator_connector.py:512: You passed `Trainer(accelerator='cpu', precision='16-mixed')` but AMP with fp16 is not supported on CPU. Using `precision='bf16-mixed'` instead.
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [7]:
tuner = L.pytorch.tuner.Tuner(trainer)

In [None]:
trainer.fit(
    model=model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

In [None]:
tuner.lr_find(model, train_loader, val_loader)