In [1]:
# %reload_ext autoreload
# %autoreload 2

# import os
# import sys

# # Add the root folder to the module search path
# # Get the current directory
# current_directory = os.getcwd()

# # Move two levels up (go to the parent directory of the parent directory)
# two_levels_up_directory = os.path.dirname(os.path.dirname(current_directory))

# print(two_levels_up_directory)

# sys.path.append(two_levels_up_directory)

In [2]:
# !pip install pyngrok
# !pip install mlflow
# !pip install boto3

In [3]:
from pykoi.chat import QuestionAnswerDatabase
from pykoi.rlhf import RLHFConfig
from pykoi.rlhf import SupervisedFinetuning
import mlflow
import datetime

  from .autonotebook import tqdm as notebook_tqdm


### Define my DB

In [4]:
qa_database = QuestionAnswerDatabase()

### Insert my data to the DB

In [5]:
import pandas as pd
input_root = "input/"
input_file = "rlhf_qa_dataset.json"
my_stackoverflow_dataset = pd.read_json(input_root + input_file, orient="records")
my_stackoverflow_dataset

Unnamed: 0,Question,Answer
0,What is InstructGPT?,InstructGPT is a language model developed by O...
1,Why does InstructGPT work?,InstructGPT works due to a two-step training p...
2,What are some commonly used evaluation metrics...,One main evaluation metric for InstructGPT is ...
3,How is InstructGPT used?,InstructGPT can be used in any application tha...
4,What are some common applications of InstructGPT?,Common applications of InstructGPT can be in e...
5,How does InstructGPT handle ambiguous prompts?,"For ambiguous prompts, InstructGPT aims to ask..."
6,Can InstructGPT generate incorrect or nonsensi...,"Yes, InstructGPT can sometimes produce plausib..."
7,How does InstructGPT manage harmful and biased...,InstructGPT has a moderation system in place t...
8,What is the role of human evaluators in the tr...,Human evaluators play a crucial role in the tr...
9,What are the limitations of InstructGPT?,There are several limitations to InstructGPT. ...


In [6]:
QA_CSV_HEADER_ID = 'ID'
QA_CSV_HEADER_QUESTION = 'Question'
QA_CSV_HEADER_ANSWER = 'Answer'
QA_CSV_HEADER_VOTE_STATUS = 'Vote Status'
QA_CSV_HEADER_TIMESTAMPS = 'Timestamp'
QA_CSV_HEADER = (
    QA_CSV_HEADER_ID,
    QA_CSV_HEADER_QUESTION,
    QA_CSV_HEADER_ANSWER,
    QA_CSV_HEADER_VOTE_STATUS,
    QA_CSV_HEADER_TIMESTAMPS
)

In [7]:
for row in my_stackoverflow_dataset.iloc[3:100].to_dict('records'):
    qa_id = qa_database.insert_question_answer(question=row[QA_CSV_HEADER_QUESTION],
                                       answer=row[QA_CSV_HEADER_ANSWER])
    qa_database.update_vote_status(id=qa_id, vote_status="up") #row[QA_CSV_HEADER_VOTE_STATUS])

In [8]:
qa_database

<pykoi.chat.db.qa_database.QuestionAnswerDatabase at 0x7f7211a2a470>

### Train RLHF using the data from database

Let's take a look of the QA data and process it for training.

In [9]:
my_data_pd = qa_database.retrieve_all_question_answers_as_pandas()
my_data_pd.head()

Unnamed: 0,ID,Question,Answer,Vote Status,Timestamp
0,1,How is InstructGPT used?,InstructGPT can be used in any application tha...,up,2023-10-04 22:19:07.475708
1,2,What are some common applications of InstructGPT?,Common applications of InstructGPT can be in e...,up,2023-10-04 22:19:07.486704
2,3,How does InstructGPT handle ambiguous prompts?,"For ambiguous prompts, InstructGPT aims to ask...",up,2023-10-04 22:19:07.496303
3,4,Can InstructGPT generate incorrect or nonsensi...,"Yes, InstructGPT can sometimes produce plausib...",up,2023-10-04 22:19:07.503408
4,5,How does InstructGPT manage harmful and biased...,InstructGPT has a moderation system in place t...,up,2023-10-04 22:19:07.511437


In [10]:
my_data_pd = my_data_pd[my_data_pd[QA_CSV_HEADER_VOTE_STATUS]=="up"]
my_data_pd.shape

(126, 5)

In [11]:
from datasets import Dataset

my_data_pd = my_data_pd[[QA_CSV_HEADER_ID,
                        QA_CSV_HEADER_QUESTION,
                        QA_CSV_HEADER_ANSWER]]
print("My local database has {} samples".format(my_data_pd.shape[0]))
dataset = Dataset.from_dict(my_data_pd)
dataset

My local database has 126 samples


Dataset({
    features: ['ID', 'Question', 'Answer'],
    num_rows: 126
})

### Train with RLHF

Set up mlflow experiment name.

In [12]:
# mlflow.set_tracking_uri("http://x.x.x.x:5000")
experiment = "rlhf_step1_sft"
current_time = str(datetime.datetime.now())
mlflow_experiment_name = '/'.join([experiment, current_time])

try:
    mlflow.end_run()
except:
    print("No mlflow run in progress")

mlflow.set_experiment(mlflow_experiment_name)

2023/10/04 22:22:12 INFO mlflow.tracking.fluent: Experiment with name 'rlhf_step1_sft/2023-10-04 22:22:12.103672' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///home/ubuntu/pykoi/example/rlhf/mlflow/mlruns/960009078910101657', creation_time=1696458132114, experiment_id='960009078910101657', last_update_time=1696458132114, lifecycle_stage='active', name='rlhf_step1_sft/2023-10-04 22:22:12.103672', tags={}>

Set pykoi parameters.

In [13]:
base_model_path = "elinas/llama-7b-hf-transformers-4.29"
dataset_type = "local_db"
log_freq = 1
max_steps = 5
peft_model_path = "./models/rlhf_step1_sft"

Manually log pykoi parameters into mlflow. Torch level parameters are automatically logged.

In [14]:
mlflow.log_param("pykoi_base_model_path", base_model_path)
mlflow.log_param("pykoi_dataset_type", dataset_type)
mlflow.log_param("pykoi_log_freq", log_freq)
mlflow.log_param("pykoi_max_steps", max_steps)
mlflow.log_param("pykoi_peft_model_path", peft_model_path)

'./models/rlhf_step1_sft'

Training metrics are automatically logged into mlflow.

In [15]:
# run supervised finetuning
config = RLHFConfig(
    base_model_path=base_model_path, 
    dataset_type=dataset_type,
    max_steps=max_steps,
    log_freq=log_freq
    )
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft.train_and_save(peft_model_path)


My local database has 126 up vote samples for SFT
Size of the train set: 113.               Size of the validation set: 13


Loading checkpoint shards: 100%|██████████| 2/2 [01:49<00:00, 54.76s/it]
Using pad_token, but it is not set yet.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss


Save the trained peft model and input into mlflow artifacts.

In [16]:
mlflow.log_artifacts(peft_model_path)
mlflow.log_artifacts(input_root)

In the terminal, run
```
mlflow ui
```
and go to http://127.0.0.1:5000 in the browser to view the experiment in the UI.