In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys

# Add the root folder to the module search path
root_folder = os.path.dirname(os.getcwd())
sys.path.append(root_folder)
print(os.getcwd())

/home/ubuntu/plotano/plotano


In [5]:
from plotano.db.ranking_database import (
    RankingDatabase, 
    RANKING_CSV_HEADER, 
    RANKING_CSV_HEADER_UP_RANKING_ANSWER,
    RANKING_CSV_HEADER_LOW_RANKING_ANSWER
)

print(RANKING_CSV_HEADER)

('ID', 'Question', 'Up Ranking Answer', 'Low Ranking Answer')


### Load a dataset

`wget` the sample data from https://huggingface.co/datasets/lvwerra/stack-exchange-paired/tree/main/data/reward

In [3]:
import pandas as pd

stackoverflow_dataset = pd.read_parquet('/home/ubuntu/datasets/reward-train-00000-of-00020.parquet', engine='fastparquet', index=False)
stackoverflow_dataset.head()

Unnamed: 0,qid,question,date,metadata,response_j,response_k
0,538868,I have a question to those who are really prof...,2020/03/27,[https://physics.stackexchange.com/questions/5...,In the absence of a magnetic field the velocit...,"Can MHD work on ""cold plasma""? Is there any ef..."
1,3665283,"When looking at dI/dt we have a term gamma\*I,...",2020/05/08,[https://math.stackexchange.com/questions/3665...,If we have $I$ infected people and the recover...,Imagine a situation where each infected indivi...
2,3665283,"When looking at dI/dt we have a term gamma\*I,...",2020/05/08,[https://math.stackexchange.com/questions/3665...,If we have $I$ infected people and the recover...,"The others have addressed the question, I just..."
3,3665283,"When looking at dI/dt we have a term gamma\*I,...",2020/05/08,[https://math.stackexchange.com/questions/3665...,Imagine a situation where each infected indivi...,"The others have addressed the question, I just..."
4,32965609,I have a setup where I use a service to log a ...,2015/10/06,"[https://Stackoverflow.com/questions/32965609,...","In your code, if there is no user logged in (s...",My current work-around is creating an other Sa...


In [4]:
my_stackoverflow_dataset = stackoverflow_dataset[['qid', 'question', 'response_j', 'response_k']]
my_stackoverflow_dataset.head()

Unnamed: 0,qid,question,response_j,response_k
0,538868,I have a question to those who are really prof...,In the absence of a magnetic field the velocit...,"Can MHD work on ""cold plasma""? Is there any ef..."
1,3665283,"When looking at dI/dt we have a term gamma\*I,...",If we have $I$ infected people and the recover...,Imagine a situation where each infected indivi...
2,3665283,"When looking at dI/dt we have a term gamma\*I,...",If we have $I$ infected people and the recover...,"The others have addressed the question, I just..."
3,3665283,"When looking at dI/dt we have a term gamma\*I,...",Imagine a situation where each infected indivi...,"The others have addressed the question, I just..."
4,32965609,I have a setup where I use a service to log a ...,"In your code, if there is no user logged in (s...",My current work-around is creating an other Sa...


In [5]:
my_stackoverflow_dataset.columns = RANKING_CSV_HEADER
my_stackoverflow_dataset.head()

Unnamed: 0,ID,Question,Up Ranking Answer,Low Ranking Answer
0,538868,I have a question to those who are really prof...,In the absence of a magnetic field the velocit...,"Can MHD work on ""cold plasma""? Is there any ef..."
1,3665283,"When looking at dI/dt we have a term gamma\*I,...",If we have $I$ infected people and the recover...,Imagine a situation where each infected indivi...
2,3665283,"When looking at dI/dt we have a term gamma\*I,...",If we have $I$ infected people and the recover...,"The others have addressed the question, I just..."
3,3665283,"When looking at dI/dt we have a term gamma\*I,...",Imagine a situation where each infected indivi...,"The others have addressed the question, I just..."
4,32965609,I have a setup where I use a service to log a ...,"In your code, if there is no user logged in (s...",My current work-around is creating an other Sa...


In [6]:
my_stackoverflow_dataset.to_dict('records')[:3]

[{'ID': 538868,
  'Question': "I have a question to those who are really proficient in MHD generation. There exist some claims that degree of interaction between flowing molecules (gases) and ions on one hand and flowing molecules (gases) and electrons on other hand is vastly different. Therefore movement of ions in a gas flow will occur much faster than movement of free electrons. If this is correct then why we need to use a strong magnets to separate ions and electrons in MHD generator? If speed of the ions and electrons movement in the same gas flow is vastly different then doesn't charge separation suppose to occur by itself just due to a gas flow? Shouldn't majority of electrons concentrate at the beginning of the duct while many more ions at the end? Then only thing we need to generate current is to put an electrode at the beginning of the duct and another one at the end and let electrons flow from the inside of the duct through the external load to the end of the duct and recomb

### Load csv to the Ranking DB

In [10]:

db = RankingDatabase(debug=False)

In [11]:
db.create_table()

In [12]:
print(RANKING_CSV_HEADER_UP_RANKING_ANSWER)
print(RANKING_CSV_HEADER_LOW_RANKING_ANSWER)

Up Ranking Answer
Low Ranking Answer


In [14]:
for row in my_stackoverflow_dataset.head(100).to_dict('records'):
    db.insert_ranking(question=row['Question'],
                      up_ranking_answer=row[RANKING_CSV_HEADER_UP_RANKING_ANSWER],
                      low_ranking_answer=row[RANKING_CSV_HEADER_LOW_RANKING_ANSWER])

In [15]:
my_ranking_data = db.retrieve_all_question_answers()
my_ranking_data[:3]

[(1,
  "I have a question to those who are really proficient in MHD generation. There exist some claims that degree of interaction between flowing molecules (gases) and ions on one hand and flowing molecules (gases) and electrons on other hand is vastly different. Therefore movement of ions in a gas flow will occur much faster than movement of free electrons. If this is correct then why we need to use a strong magnets to separate ions and electrons in MHD generator? If speed of the ions and electrons movement in the same gas flow is vastly different then doesn't charge separation suppose to occur by itself just due to a gas flow? Shouldn't majority of electrons concentrate at the beginning of the duct while many more ions at the end? Then only thing we need to generate current is to put an electrode at the beginning of the duct and another one at the end and let electrons flow from the inside of the duct through the external load to the end of the duct and recombine with ions there? An

In [2]:
my_ranking_sql_data_path = 'my_ranking_data.csv'


In [16]:
import csv


with open(my_ranking_sql_data_path, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(RANKING_CSV_HEADER)
    writer.writerows(my_ranking_data)

In [17]:
db.close_connection()

### Load to huggingface

In [3]:
from datasets import load_dataset
ds = load_dataset('csv', data_files=my_ranking_sql_data_path)

  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-331cd4c59f87e105/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)
100%|██████████| 1/1 [00:00<00:00, 758.74it/s]


### Test RLHF code with DB

In [4]:
from plotano.rlhf.rlhf import RLHFConfig


config = RLHFConfig(
    base_model_path="meta-llama/Llama-2-7b-hf", ## "elinas/llama-7b-hf-transformers-4.29",
    reward_model_path="databricks/dolly-v2-3b", 
    dataset_type="csv", ## "huggingface", ## 
    dataset_name="/home/ubuntu/plotano/plotano/my_ranking_data.csv",
    reward_epochs=3,
    output_dir="../rlhf_tests/",
    reward_num_of_data=1000,
)


[2023-07-20 23:08:03,796] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)

Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/envs/trl/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /opt/conda/envs/trl/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)


In [5]:
from plotano.rlhf.rlhf import  RewardTrainer

rlhf_step2_reward_trainer = RewardTrainer(config)

Some weights of the model checkpoint at databricks/dolly-v2-3b were not used when initializing GPTNeoXForSequenceClassification: ['embed_out.weight']
- This IS expected if you are initializing GPTNeoXForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPTNeoXForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of GPTNeoXForSequenceClassification were not initialized from the model checkpoint at databricks/dolly-v2-3b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 5,248,000 || all params: 2,651,617,280 || trainable%: 0.19791694825582068


Found cached dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-331cd4c59f87e105/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)
100%|██████████| 1/1 [00:00<00:00, 894.12it/s]
Map:   0%|          | 0/200 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
                                                                  

Size of the train set: 113.               Size of the validation set: 13




In [6]:
rlhf_step2_reward_trainer.train_and_save(output_path="../tests/step2_07201608")

You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  attn_scores = torch.where(causal_mask, attn_scores, mask_value)
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Accuracy
10,131.9256,,1.0
20,0.0,,1.0
30,0.0,,1.0
40,0.0,,1.0
50,0.0,,1.0
60,0.0,,1.0
70,0.0,,1.0


