In [1]:
%load_ext autoreload
%autoreload 2

## Imports

In [5]:
from peft import LoraConfig
from transformers import AutoTokenizer, HfArgumentParser, pipeline

from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer, set_seed

In [6]:
config = PPOConfig(
    model_name="gpt2",
#     learning_rate=script_args.learning_rate,
#     log_with=script_args.log_with,
#     mini_batch_size=script_args.mini_batch_size,
#     batch_size=script_args.batch_size,
#     gradient_accumulation_steps=script_args.gradient_accumulation_steps,
)

In [7]:
# set seed before initializing value head for deterministic eval
set_seed(config.seed)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = AutoModelForCausalLMWithValueHead.from_pretrained(
    config.model_name,
#     load_in_8bit=True,
    peft_config=lora_config,
)

tokenizer = AutoTokenizer.from_pretrained(config.model_name)


# Apply LoRA
# Here comes the magic with `peft`! Let's load a `PeftModel` and specify that we are going to use low-rank adapters (LoRA) using `get_peft_model` utility function from `peft`.
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


print_trainable_parameters(model)

trainable params: 590593 || all params: 125030401 || trainable%: 0.47235951838625234
