In [1]:
# %reload_ext autoreload
# %autoreload 2

# import os
# import sys

# # Add the root folder to the module search path
# # Get the current directory
# current_directory = os.getcwd()

# # Move two levels up (go to the parent directory of the parent directory)
# two_levels_up_directory = os.path.dirname(os.path.dirname(current_directory))

# print(two_levels_up_directory)

# sys.path.append(two_levels_up_directory)

In [2]:
from pykoi.chat import QuestionAnswerDatabase
from pykoi.rlhf import RLHFConfig
from pykoi.rlhf import PreTraining

  from .autonotebook import tqdm as notebook_tqdm


### Define my DB

In [3]:
qa_database = QuestionAnswerDatabase()

### Insert my data to the DB

In [4]:
import pandas as pd
my_stackoverflow_dataset = pd.read_json("rlhf_qa_dataset.json", orient="records")
my_stackoverflow_dataset

Unnamed: 0,Question,Answer
0,What is InstructGPT?,InstructGPT is a language model developed by O...
1,Why does InstructGPT work?,InstructGPT works due to a two-step training p...
2,What are some commonly used evaluation metrics...,One main evaluation metric for InstructGPT is ...
3,How is InstructGPT used?,InstructGPT can be used in any application tha...
4,What are some common applications of InstructGPT?,Common applications of InstructGPT can be in e...
5,How does InstructGPT handle ambiguous prompts?,"For ambiguous prompts, InstructGPT aims to ask..."
6,Can InstructGPT generate incorrect or nonsensi...,"Yes, InstructGPT can sometimes produce plausib..."
7,How does InstructGPT manage harmful and biased...,InstructGPT has a moderation system in place t...
8,What is the role of human evaluators in the tr...,Human evaluators play a crucial role in the tr...
9,What are the limitations of InstructGPT?,There are several limitations to InstructGPT. ...


In [4]:
QA_CSV_HEADER_ID = 'ID'
QA_CSV_HEADER_QUESTION = 'Question'
QA_CSV_HEADER_ANSWER = 'Answer'
QA_CSV_HEADER_VOTE_STATUS = 'Vote Status'
QA_CSV_HEADER_TIMESTAMPS = 'Timestamp'
QA_CSV_HEADER = (
    QA_CSV_HEADER_ID,
    QA_CSV_HEADER_QUESTION,
    QA_CSV_HEADER_ANSWER,
    QA_CSV_HEADER_VOTE_STATUS,
    QA_CSV_HEADER_TIMESTAMPS
)

In [None]:
for row in my_stackoverflow_dataset.iloc[3:100].to_dict('records'):
    qa_id = qa_database.insert_question_answer(question=row[QA_CSV_HEADER_QUESTION],
                                       answer=row[QA_CSV_HEADER_ANSWER])
    qa_database.update_vote_status(id=qa_id, vote_status="up") #row[QA_CSV_HEADER_VOTE_STATUS])

In [5]:
qa_database

<pykoi.db.qa_database.QuestionAnswerDatabase at 0x7f1762f6d450>

### Train RLHF using the data from database

Let's take a look of the QA data and process it for training.

In [6]:
my_data_pd = qa_database.retrieve_all_question_answers_as_pandas()
my_data_pd.head()

Unnamed: 0,ID,Question,Answer,Vote Status,Timestamp
0,1,who founded YC and what does YC do,Y Combinator is a startup accelerator that pro...,down,2023-07-24 08:28:29.461045
1,2,what are the top YC companies,I am looking for a list of the top YC companie...,down,2023-07-24 08:29:17.521467
2,3,How much does top 10 YC companies worth,"According to the latest Crunchbase data, the 1...",down,2023-07-24 08:29:58.248998
3,4,What's the meaning of life,"The meaning of life is to be happy, to be usef...",down,2023-07-24 19:26:04.713297
4,5,What is the meaning of life,The meaning of life is to find your gift. The ...,up,2023-07-24 19:26:28.676427


In [7]:
my_data_pd = my_data_pd[my_data_pd[QA_CSV_HEADER_VOTE_STATUS]=="up"]
my_data_pd.shape

(118, 5)

In [8]:
from datasets import Dataset

my_data_pd = my_data_pd[[QA_CSV_HEADER_ID,
                        QA_CSV_HEADER_QUESTION,
                        QA_CSV_HEADER_ANSWER]]
print("My local database has {} samples".format(my_data_pd.shape[0]))
dataset = Dataset.from_dict(my_data_pd)
dataset

My local database has 118 samples


Dataset({
    features: ['ID', 'Question', 'Answer'],
    num_rows: 118
})

### Train with RLHF

In [10]:
# run pre-training
config = RLHFConfig(base_model_path="elinas/llama-7b-hf-transformers-4.29", dataset_type="local_db")
rlhf_step1_sft = PreTraining(config)
rlhf_step1_sft.train_and_save("./models/rlhf_step1_sft")


Downloading (…)okenizer_config.json: 100%|██████████| 749/749 [00:00<00:00, 5.20MB/s]
Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 14.1MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 1.84M/1.84M [00:00<00:00, 54.1MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 414/414 [00:00<00:00, 3.12MB/s]


My local database has 118 samples
Size of the train set: 106.               Size of the validation set: 12


Downloading (…)lve/main/config.json: 100%|██████████| 630/630 [00:00<00:00, 4.69MB/s]
Downloading (…)fetensors.index.json: 100%|██████████| 26.8k/26.8k [00:00<00:00, 73.3MB/s]
Downloading (…)of-00002.safetensors: 100%|██████████| 9.98G/9.98G [00:47<00:00, 210MB/s]
Downloading (…)of-00002.safetensors: 100%|██████████| 3.50G/3.50G [00:26<00:00, 130MB/s]
Downloading shards: 100%|██████████| 2/2 [01:14<00:00, 37.34s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.71s/it]
Downloading (…)neration_config.json: 100%|██████████| 167/167 [00:00<00:00, 1.25MB/s]
Using pad_token, but it is not set yet.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss


: 