Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFT for D2L + Pre-Training (rename of the previous SFT) #102

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added example/autorate/data/Chapter 5 Rome.docx
Binary file not shown.
497 changes: 497 additions & 0 deletions example/autorate/data/rome.txt

Large diffs are not rendered by default.

2,647 changes: 1,244 additions & 1,403 deletions example/data_generation/immigration_gen_data.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"source": [
"from pykoi.chat import QuestionAnswerDatabase\n",
"from pykoi.rlhf import RLHFConfig\n",
"from pykoi.rlhf import SupervisedFinetuning"
"from pykoi.rlhf import PreTraining"
]
},
{
Expand Down Expand Up @@ -762,9 +762,9 @@
}
],
"source": [
"# run supervised finetuning\n",
"# run pre-training\n",
"config = RLHFConfig(base_model_path=\"elinas/llama-7b-hf-transformers-4.29\", dataset_type=\"local_db\")\n",
"rlhf_step1_sft = SupervisedFinetuning(config)\n",
"rlhf_step1_sft = PreTraining(config)\n",
"rlhf_step1_sft.train_and_save(\"./models/rlhf_step1_sft\")\n"
]
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Demo for the supervised fine tuning.
"""Demo for the pre-training.

python -m example.rlhf.demo_supervised_finetuning_nike
python -m example.rlhf.demo_pre_training_nike
"""

from peft import LoraConfig, TaskType

from pykoi.rlhf import RLHFConfig, SupervisedFinetuning
from pykoi.rlhf import RLHFConfig, PreTraining

base_model_path = "meta-llama/Llama-2-7b-chat-hf"
dataset_name = "./output_self_instructed_data_nike_10k_2023_FULL.csv"
Expand Down Expand Up @@ -38,7 +38,7 @@
)


# run supervised finetuning
# run pre-training
config = RLHFConfig(
base_model_path=base_model_path,
dataset_type=dataset_type,
Expand All @@ -56,5 +56,5 @@
size_valid_set=size_valid_set,
lora_config_rl=lora_config,
)
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft = PreTraining(config)
rlhf_step1_sft.train_and_save(peft_model_path)
627 changes: 627 additions & 0 deletions example/rlhf/demo_supervised_finetuning_d2l_eval.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Demo for the supervised fine tuning.
"""Demo for the pre-training.

python -m example.rlhf.supervised_finetuning_demo
python -m example.rlhf.pre_training_demo
"""

from pykoi.chat import QuestionAnswerDatabase
from pykoi.chat.db.constants import (QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.rlhf import RLHFConfig, SupervisedFinetuning
from pykoi.rlhf import RLHFConfig, PreTraining

# get data from local database
qa_database = QuestionAnswerDatabase()
Expand All @@ -25,7 +25,7 @@
print(my_data_pd)
print("My local database has {} samples in total".format(my_data_pd.shape[0]))

# run supervised finetuning
# run pre-training
config = RLHFConfig(base_model_path="databricks/dolly-v2-3b", dataset_type="local_db")
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft = PreTraining(config)
rlhf_step1_sft.train_and_save("./models/rlhf_step1_sft")
43 changes: 43 additions & 0 deletions example/rlhf/supervised_finetuning_demo_d2l.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Demo for the supervised fine tuning.

python -m example.rlhf.supervised_finetuning_demo_d2l
"""

from peft import LoraConfig
from pykoi.chat import QuestionAnswerDatabase
from pykoi.chat.db.constants import (QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.rlhf import RLHFConfig, SupervisedFinetuning
from trl import DataCollatorForCompletionOnlyLM



# run supervised finetuning
config = RLHFConfig(base_model_path="mistralai/Mistral-7B-Instruct-v0.1",
dataset_type="local_csv", dataset_name="data/chapter22_trnvalfromseed_data_processed.csv",
train_test_split_ratio=0, # ratio for test set DH:TODO: COBINE TRAIN AND EVAL
max_seq_length=896,
per_device_eval_batch_size=1,
log_freq=20,
# dh: NOTE: 1 EPOCH iterates the dataset once. So log freq 20 means iterating 20 entries when training batch size = 1.
# (i.e., log_freq = 0.12 epoch when the dataset has 166 entires).
save_freq=40000,
num_train_epochs=20,
max_steps=-1, # if a positive number is given, it will override num_train_epochs
device_map="auto",
lora_config_rl=LoraConfig(
r=512,
lora_alpha=1024,
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", ], # "gate_proj","up_proj","down_proj",], #"lm_head",],
bias="none",
task_type="CAUSAL_LM"
),
data_collator=DataCollatorForCompletionOnlyLM,
no_evaluation=True,
prepare_text="d2l",
split = "train[:10%]"
)
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft.train_and_save("./models/rlhf_step1_sft")
14 changes: 14 additions & 0 deletions pykoi/rlhf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from accelerate import Accelerator
from peft import LoraConfig, TaskType
import transformers


@dataclass
Expand Down Expand Up @@ -119,6 +120,7 @@ class RLHFConfig:
default="./rlhf_checkpoints",
metadata={"help": "Output directory for all model weights."},
)
num_train_epochs: Optional[int] = field(default=5, metadata={"help": "supervised fine tuning training epochs"})
log_freq: Optional[int] = field(default=1, metadata={"help": "Logging frequency."})
eval_freq: Optional[int] = field(
default=1000, metadata={"help": "Evaluation frequency."}
Expand Down Expand Up @@ -182,6 +184,18 @@ class RLHFConfig:
),
metadata={"help": "LoRA configuration."},
)
data_collator: Optional[transformers.DataCollator] = field(
default=None,
metadata={"help": "The data collator to use for training."},
)
no_evaluation: Optional[bool] = field(
default=False,
metadata={"help": "Whether to disable evaluations during training."},
)
prepare_text: Optional[str] = field(
default="sample",
metadata={"help": "How to prepare the text for the model."},
)

# Step 2 reward modeling parameters
reward_model_path: Optional[str] = field(
Expand Down
225 changes: 225 additions & 0 deletions pykoi/rlhf/pre_traning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""pre-training."""
import os
import time
from datetime import datetime
from typing import Optional

import torch
from datasets import Dataset, load_dataset
from peft import PeftConfig, PeftModel
from transformers import (AutoModelForCausalLM,
AutoModelForSequenceClassification, AutoTokenizer,
TrainingArguments)
from trl import SFTTrainer
from trl.trainer.utils import ConstantLengthDataset

from pykoi.chat.db.constants import (QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.chat.db.qa_database import QuestionAnswerDatabase
from pykoi.rlhf.config import RLHFConfig
from pykoi.telemetry.events import SFTStartEvent, SFTStopEvent
from pykoi.telemetry.telemetry import Telemetry


class PreTraining:
"""
A class representing the pre-training trainer.

Attributes:
rlhf_config (RLHFConfig): The RLHF configuration object.
tokenizer (AutoTokenizer): The tokenizer used for tokenizing the input data.
num_proc (int): The number of workers to use for data loading.
dataset (Dict[str, Dataset]): A dictionary containing the train and eval datasets.
torch_dtype (torch.dtype): The torch data type to use for training.
training_args (TrainingArguments): The training arguments for the trainer.
model (AutoModelForCausalLM): The model to train.
trainer (SFTTrainer): The trainer object used for training the model.
"""

def __init__(self, rlhf_config: RLHFConfig, enable_telemetry: bool = True) -> None:
"""
Initializes the SFTTrainer object.

Args:
rlhf_config (RLHFConfig): The RLHF configuration object.
enbale_telemetry (bool): Whether to enable telemetry or not.
"""
self._telemetry = Telemetry(enable_telemetry)
self._rlhf_config = rlhf_config
self.tokenizer = AutoTokenizer.from_pretrained(rlhf_config.base_model_path)
self.num_proc = (
self._rlhf_config.num_workers if not self._rlhf_config.streaming else None
)
self.dataset = self.create_datasets(self.tokenizer, self._rlhf_config)
self.torch_dtype = torch.bfloat16 if self._rlhf_config.bf16 else torch.float16
# self.torch_dtype = torch.bfloat16 if bf16 else (torch.float16 if fp16 else torch.float32)
self.training_args = TrainingArguments(
output_dir=self._rlhf_config.output_dir,
dataloader_drop_last=True,
evaluation_strategy=self._rlhf_config.evaluation_strategy,
max_steps=self._rlhf_config.max_steps,
eval_steps=self._rlhf_config.eval_freq,
save_steps=self._rlhf_config.save_freq,
logging_steps=self._rlhf_config.log_freq,
per_device_train_batch_size=self._rlhf_config.per_device_train_batch_size,
per_device_eval_batch_size=self._rlhf_config.per_device_eval_batch_size,
learning_rate=self._rlhf_config.learning_rate,
lr_scheduler_type=self._rlhf_config.lr_scheduler_type_sft,
warmup_steps=self._rlhf_config.num_warmup_steps,
gradient_accumulation_steps=self._rlhf_config.gradient_accumulation_steps,
gradient_checkpointing=self._rlhf_config.gradient_checkpointing,
gradient_checkpointing_kwargs={
"use_reentrant": self._rlhf_config.gradient_checkpointing_use_reentrant
},
fp16=self._rlhf_config.fp16,
bf16=self._rlhf_config.bf16,
weight_decay=self._rlhf_config.weight_decay,
run_name="step1_pre_training",
ddp_find_unused_parameters=False,
)
self.model = AutoModelForCausalLM.from_pretrained(
self._rlhf_config.base_model_path,
load_in_8bit=self._rlhf_config.load_in_8bit,
device_map=self._rlhf_config.device_map,
)
self.trainer = SFTTrainer(
model=self.model,
args=self.training_args,
train_dataset=self.dataset["train"],
eval_dataset=self.dataset["eval"],
peft_config=self._rlhf_config.lora_config_rl,
packing=True,
)

def train(self):
"""
Trains the model using the SFTTrainer object.
"""
self.trainer.train()

def load_lora(
self,
base_model_path: Optional[str] = None,
lora_model_path: Optional[str] = None,
):
if base_model_path is None:
base_model_path = self._rlhf_config.base_model_path

# Load lora config
if lora_model_path is None:
lora_config = self.trainer.model.config
else:
lora_config = PeftConfig.from_pretrained(lora_model_path)

# Load the base tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
if lora_config.task_type == "SEQ_CLS":
# peft is for reward model so load sequence classification
base_model = AutoModelForSequenceClassification.from_pretrained(
base_model_path,
num_labels=1,
torch_dtype=self._rlhf_config.torch_dtype,
)
elif lora_config.task_type == "CAUSAL_LM":
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
return_dict=True,
torch_dtype=self._rlhf_config.torch_dtype,
)
else:
raise ValueError("Invalid task_type in lora_config")

# Merge the base model and the Lora model
model = PeftModel.from_pretrained(base_model, lora_config)
return model, tokenizer

def save(self, output_path=None):
if output_path is None:
output_path = os.path.join(
self._rlhf_config.output_dir, self._rlhf_config.sft_lora_path
)
self.trainer.save_model(output_path)

def train_and_save(self, output_path=None):
start_event = SFTStartEvent(
start_time=time.time(), date_time=datetime.utcfromtimestamp(time.time())
)
self._telemetry.capture(start_event)
self.trainer.train()
self.save(output_path)
self._telemetry.capture(
SFTStopEvent(
end_time=time.time(),
date_time=datetime.utcfromtimestamp(time.time()),
duration=time.time() - start_event.start_time,
)
)

def prepare_sample_text(self, example):
"""Prepare the text from a sample of the dataset."""
text = (
f"Question: {example[self._rlhf_config.question_title]}\n\n "
f" Answer: {example[self._rlhf_config.answer_title]}"
)
return text

def create_datasets(self, tokenizer, args):
if args.dataset_type == "local_db":
qa_database = QuestionAnswerDatabase()
my_data_pd = qa_database.retrieve_all_question_answers_as_pandas()
my_data_pd = my_data_pd[my_data_pd[QA_CSV_HEADER_VOTE_STATUS] == "up"]
my_data_pd = my_data_pd[
[QA_CSV_HEADER_ID, QA_CSV_HEADER_QUESTION, QA_CSV_HEADER_ANSWER]
]
print(
"My local database has {} up vote samples for pre-training".format(
my_data_pd.shape[0]
)
)
dataset = Dataset.from_dict(my_data_pd)
elif args.dataset_type == "local_csv":
dataset = load_dataset("csv", data_files=args.dataset_name)
dataset = dataset[args.split] # Convert DatasetDict to Dataset
elif args.dataset_type == "huggingface":
dataset = load_dataset(
args.dataset_name,
data_dir=args.dataset_subset_sft,
split=args.split,
use_auth_token=True,
num_proc=self.num_proc,
streaming=args.streaming,
)
dataset = dataset[args.split] # Convert DatasetDict to Dataset
else:
raise FileNotFoundError(
"No (supported) data files or dataset script found"
f" {args.dataset_type}"
)

dataset = dataset.train_test_split(
test_size=args.train_test_split_ratio, seed=args.seed
)
print(
f"Size of the train set: {len(dataset['train'])}. "
f" Size of the validation set: {len(dataset['test'])}"
)

train_dataset = ConstantLengthDataset(
tokenizer,
dataset["train"],
formatting_func=self.prepare_sample_text,
infinite=True,
seq_length=args.max_seq_length,
# chars_per_token=chars_per_token,
)
eval_dataset = ConstantLengthDataset(
tokenizer,
dataset["test"],
formatting_func=self.prepare_sample_text,
infinite=False,
seq_length=args.max_seq_length,
# chars_per_token=chars_per_token,
)
return {"train": train_dataset, "eval": eval_dataset}

Loading
Loading