Skip to content

Commit

Permalink
[Feature] PPO pipeline major dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Yizhen committed Jun 11, 2024
1 parent a94cf11 commit 56dd266
Show file tree
Hide file tree
Showing 7 changed files with 500 additions and 74 deletions.
23 changes: 23 additions & 0 deletions configs/accelerate_deepspeed_zero3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: true
zero3_save_16bit_model: true
zero_stage: 3
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 4
gpu_ids:
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
30 changes: 7 additions & 23 deletions scripts/run_ppo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
# coding=utf-8
# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.
# Parses arguments
model_name_or_path=google/gemma-2b-it
reward_model_name_or_path=/vol/yizhenjia/projs/LMFlow/output_models/gemma-rm
model_name_or_path=/home/yizhenjia/.cache/huggingface/hub/models--EleutherAI--pythia-1b-deduped/snapshots/7199d8fc61a6d565cd1f3c62bf11525b563e13b2
reward_model_name_or_path=/home/yizhenjia/.cache/huggingface/hub/models--EleutherAI--pythia-1b-deduped/snapshots/7199d8fc61a6d565cd1f3c62bf11525b563e13b2
train_dataset_path=/vol/yizhenjia/projs/LMFlow/data/alpaca/train_conversation
output_dir=output_models/ppo
deepspeed_args="--master_port=11345 --include localhost:4,5,6,7"
conversation_template=gemma

# Safety related arguments
Expand Down Expand Up @@ -35,10 +34,6 @@ while [[ $# -ge 1 ]]; do
conversation_template="$2"
shift
;;
--deepspeed_args)
deepspeed_args="$2"
shift
;;
--trust_remote_code)
trust_remote_code="$2"
shift
Expand All @@ -56,34 +51,23 @@ project_dir=$(cd "$(dirname $0)"/..; pwd)
log_dir=${project_dir}/log/${exp_id}
mkdir -p ${output_dir} ${log_dir}

deepspeed ${deepspeed_args} \
accelerate launch --config_file configs/accelerate_deepspeed_zero3.yaml \
examples/ppo.py \
--deepspeed configs/ds_config_zero3.json \
--model_name_or_path ${model_name_or_path} \
--reward_model_name_or_path ${reward_model_name_or_path} \
--do_train True \
--do_eval True \
--dataset_path ${train_dataset_path} \
--conversation_template ${conversation_template} \
--output_dir ${output_dir} --overwrite_output_dir \
--use_flash_attention True \
--block_size 4096 \
--block_size 64 \
--learning_rate 1e-5 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--num_train_epochs 0.001 \
--weight_decay 0.001 \
--evaluation_strategy "steps" \
--save_strategy "steps" \
--save_steps 999999 \
--num_train_epochs 0.01 \
--num_ppo_epochs 1 \
--gradient_accumulation_steps 32 \
--gradient_checkpointing True \
--remove_unused_columns False \
--bf16 True \
--logging_strategy "steps" \
--logging_steps 10 \
--optim "paged_adamw_32bit" \
--lr_scheduler_type "cosine" \
--warmup_ratio 0.03 \
--report_to 'wandb' \
--run_name ${exp_id} \
--preprocessing_num_workers 4 \
Expand Down
2 changes: 1 addition & 1 deletion src/lmflow/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ class PPOArguments(FinetunerArguments):
"""whether to penalize responses that do not contain `stop_token_id`"""

# ppo config
num_ppo_epochs: int = 4
num_ppo_epochs: int = 1
"""the number of epochs to train"""
vf_coef: float = 0.1
"""the value function coefficient"""
Expand Down
44 changes: 33 additions & 11 deletions src/lmflow/models/hf_model_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.
import os
import logging
from typing import Union, Optional
from typing import Union, Optional, Dict

import torch
import deepspeed
Expand All @@ -30,6 +30,7 @@
from lmflow.utils.constants import (
LMFLOW_LORA_TARGET_MODULES_MAPPING
)
from lmflow.args import ModelArguments


logger = logging.getLogger(__name__)
Expand All @@ -51,11 +52,12 @@
class HFModelMixin(BaseModel):
def __init__(
self,
model_args,
model_args: ModelArguments,
do_train: bool,
ds_config=None,
device: Optional[str]="gpu",
use_accelerator: bool=False,
hf_auto_model_additional_args: Optional[Dict]=None,
*args,
**kwargs
):
Expand Down Expand Up @@ -88,7 +90,7 @@ def __init__(
self.model_args = model_args
self.tokenizer = self.__prepare_tokenizer(model_args)
self.torch_dtype = self.__prepare_dtype(model_args)
self.hf_model_config = self.__prepare_model_config(model_args)
self.hf_model_config = self.__prepare_model_config(model_args, hf_auto_model_additional_args)
self.quant_config = self.__prepare_quant_config(model_args)
self.peft_config = self.__prepare_peft_config(model_args)

Expand All @@ -110,7 +112,7 @@ def __init__(

def __prepare_tokenizer(
self,
model_args
model_args: ModelArguments
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
tokenizer_kwargs = {
"cache_dir": model_args.cache_dir,
Expand Down Expand Up @@ -163,7 +165,7 @@ def __prepare_tokenizer(

def __prepare_dtype(
self,
model_args
model_args: ModelArguments
) -> torch.dtype:
if model_args.arch_type == 'text_regression':
if model_args.torch_dtype in ["auto", None, "bf16", "bfloat16"]:
Expand All @@ -189,8 +191,25 @@ def __prepare_dtype(

def __prepare_model_config(
self,
model_args
model_args: ModelArguments,
hf_auto_model_additional_args: Optional[Dict]=None,
):
"""Prepare model configuration for hf auto register,
Parameters
----------
model_args : ModelArguments
LMFlow model arguments.
hf_auto_model_additional_args : Optional[Dict], optional
Special configurations such as `num_labels` in `AutoModelForSequenceClassification`
(commonly used in reward modeling) will not preset in __prepare_model_config,
so it should be passed in hf_auto_model_additional_args.
Returns
-------
config : ModelConfig
hf model config.
"""
config_kwargs = {
"torch_dtype": self.torch_dtype,
"attn_implementation": "flash_attention_2" if model_args.use_flash_attention else None,
Expand All @@ -200,6 +219,9 @@ def __prepare_model_config(
"trust_remote_code": model_args.trust_remote_code,
"from_tf": bool(".ckpt" in model_args.model_name_or_path),
}
if hf_auto_model_additional_args is not None:
config_kwargs.update(hf_auto_model_additional_args)

if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
elif model_args.model_name_or_path:
Expand All @@ -217,7 +239,7 @@ def __prepare_model_config(

def __prepare_quant_config(
self,
model_args
model_args: ModelArguments
):
quant_config = None
if model_args.use_qlora:
Expand All @@ -236,7 +258,7 @@ def __prepare_quant_config(

def __prepare_peft_config(
self,
model_args
model_args: ModelArguments
):
peft_config = None
if model_args.use_lora:
Expand Down Expand Up @@ -267,7 +289,7 @@ def __prepare_peft_config(

def __model_module_inject(
self,
model_args
model_args: ModelArguments
) -> None:
"""Override some model modules with custom implementations.
Expand All @@ -286,7 +308,7 @@ def __model_module_inject(

def __prepare_model_for_training(
self,
model_args,
model_args: ModelArguments,
hf_auto_model: HF_AUTOMODEL_TYPE
):
# TODO: change to accelerate
Expand Down Expand Up @@ -326,7 +348,7 @@ def __prepare_model_for_training(

def __prepare_model_for_inference(
self,
model_args,
model_args: ModelArguments,
hf_auto_model: HF_AUTOMODEL_TYPE,
use_accelerator,
ds_config
Expand Down
2 changes: 2 additions & 0 deletions src/lmflow/models/hf_text_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,15 @@ def __init__(
:param tune_strategy: tuning strategy: normal, none, lora or adapter
:param ds_config: deepspeed configuration for distributed training
"""
config_additional_args = {"num_labels": 1}
HFModelMixin.__init__(
self,
model_args=model_args,
do_train=True if tune_strategy == "normal" else False,
ds_config=ds_config,
device=device,
use_accelerator=use_accelerator,
hf_auto_model_additional_args=config_additional_args,
*args,
**kwargs
)
Expand Down
36 changes: 31 additions & 5 deletions src/lmflow/pipeline/ppo_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,37 @@ def tune(
model_max_length=model.get_max_length(),
)
train_dataset = lm_dataset.get_backend_dataset()
logger.info(f"Number of train samples: {len(train_dataset)}")
num_train_samples = len(train_dataset)
logger.info(f"Number of train samples: {num_train_samples}")

if self.finetuner_args.do_train and self.data_args.max_train_samples is not None:
max_train_samples = min(len(train_dataset), self.data_args.max_train_samples)
max_train_samples = min(num_train_samples, self.data_args.max_train_samples)
train_dataset = train_dataset.select(range(max_train_samples))

if self.finetuner_args.do_eval:
logger.warning("Currently eval for RLHF is not supported.")
if self.finetuner_args.eval_dataset_path:
eval_dataset_args = deepcopy(self.data_args)
eval_dataset_args.dataset_path = self.finetuner_args.eval_dataset_path
eval_dataset = Dataset(eval_dataset_args)
with self.finetuner_args.main_process_first(desc="dataset map tokenization"):
tokenized_dataset = model.tokenize(eval_dataset)
if self.data_args.disable_group_texts:
lm_dataset = tokenized_dataset
else:
lm_dataset = self.group_text(
tokenized_dataset,
model_max_length=model.get_max_length(),
)
eval_dataset = lm_dataset.get_backend_dataset()
else:
num_eval_sampels = int(num_train_samples * 0.2)
eval_dataset = train_dataset.select(range(num_train_samples - num_eval_sampels, num_train_samples))
train_dataset = train_dataset.select(range(num_train_samples - num_eval_sampels))
logger.warning(f"You've set `do_eval=True` but haven't provided an `eval_dataset_path`. "
"Using 0.2 of the training dataset for evaluation (These samples "
"will not be used for training). If you want to use a different dataset "
"for evaluation, please provide the path to the dataset using")
logger.info(f"Number of eval samples: {len(eval_dataset)}")

# 2. prepare trainer
trainer = PPOTrainer(
Expand All @@ -94,15 +117,18 @@ def tune(
reward_model=reward_model.get_backend_model(),
value_model=value_model.get_backend_model(),
train_dataset=train_dataset,
eval_dataset=None
eval_dataset=eval_dataset,
)

# 3. training
if self.finetuner_args.do_train:
# TODO: checkpointing
trainer.train()
trainer.save_model(self.finetuner_args.output_dir)
trainer.push_to_hub()
print("Model saved to %s", self.finetuner_args.output_dir)
if self.finetuner_args.push_to_hub:
print('push to hub')
trainer.push_to_hub()
trainer.generate_completions()

return model
Loading

0 comments on commit 56dd266

Please sign in to comment.