## Data Transformation and Finetuning with OpenPipe

In [1]:
import json
import random

from argon2.low_level import error_to_str
from openpipe import OpenAI
from prompts.prompts import qp_system_prompt, cp_system_prompt, qp_system_prompt_sudoku, cp_system_prompt_sudoku
from utils.save_and_load_json import load_all_json_files, save_jsonl, save_json
from utils.transfrom_data_for_finetuing import split_dataset, format_qp_for_openpipe_finetuning, format_cp_for_openpipe_finetuning

# load in datasets
logic701 = load_all_json_files(folder_path='./data/train/raw')
gpt_one_call = load_all_json_files(folder_path='./data/synthetic/processed/gpt_one_call')
gpt_two_calls = load_all_json_files(folder_path='./data/synthetic/processed/gpt_two_calls')
sudoku = load_all_json_files(folder_path='./data/synthetic/processed/sudoku_4x4')

Loaded 1 JSON files from ./data/train/raw
Loaded 1 JSON files from ./data/synthetic/processed/gpt_one_call
Loaded 1 JSON files from ./data/synthetic/processed/gpt_two_calls
Loaded 1 JSON files from ./data/synthetic/processed/sudoku_4x4


In [2]:
# Set seed for reproducibility
random.seed(42)

# Get 250 random samples
logic701_samples = random.sample(logic701[0], 250)
sudoku_samples = random.sample(sudoku[0], 250)
gpt_one_call_samples = random.sample(gpt_one_call[0], 250)
gpt_two_call_samples = random.sample(gpt_two_calls[0], 250)

print('verifying dataset len samples:')
print(len(logic701_samples))
print(len(sudoku_samples))
print(len(gpt_one_call_samples))
print(len(gpt_two_call_samples))

# create qp and cp splits
logic701_samples_qp, logic701_samples_cp = split_dataset(logic701_samples)
sudoku_samples_qp, sudoku_samples_cp = split_dataset(sudoku_samples)
gpt_one_call_samples_qp, gpt_one_call_samples_cp = split_dataset(gpt_one_call_samples)
gpt_two_call_samples_qp, gpt_two_call_samples_cp = split_dataset(gpt_two_call_samples)

print('verifying dataset len for qp and cp:')
for dataset in [logic701_samples_qp, logic701_samples_cp, sudoku_samples_qp, sudoku_samples_cp, gpt_one_call_samples_qp, gpt_two_call_samples_qp, gpt_two_call_samples_cp]: print(len(dataset))

verifying dataset len samples:
250
250
250
250
verifying dataset len for qp and cp:
250
250
250
250
250
250
250


In [3]:
# format data into OpenAI finetuning format for OpenPipe
logic701_formatted_qp = format_qp_for_openpipe_finetuning(logic701_samples_qp, system_prompt=qp_system_prompt)
logic701_formatted_cp = format_cp_for_openpipe_finetuning(logic701_samples_cp, system_prompt=cp_system_prompt)

sudoku_formatted_qp = format_qp_for_openpipe_finetuning(sudoku_samples_qp, system_prompt=qp_system_prompt)
sudoku_formatted_cp = format_cp_for_openpipe_finetuning(sudoku_samples_cp, system_prompt=cp_system_prompt)

gpt_one_call_formatted_qp = format_qp_for_openpipe_finetuning(gpt_one_call_samples_qp, system_prompt=qp_system_prompt)
gpt_one_call_formatted_cp = format_cp_for_openpipe_finetuning(gpt_one_call_samples_cp, system_prompt=cp_system_prompt)

gpt_two_call_formatted_qp = format_qp_for_openpipe_finetuning(gpt_two_call_samples_qp, system_prompt=qp_system_prompt)
gpt_two_call_formatted_cp = format_cp_for_openpipe_finetuning(gpt_two_call_samples_cp, system_prompt=cp_system_prompt)

# save data as jsonl
save_jsonl(logic701_formatted_qp, "./data/synthetic/finetuning/logic701_finetuning_qp.jsonl")
save_jsonl(logic701_formatted_cp, "./data/synthetic/finetuning/logic701_finetuning_cp.jsonl")

save_jsonl(sudoku_formatted_qp, "./data/synthetic/finetuning/sudoku_finetuning_qp.jsonl")
save_jsonl(sudoku_formatted_cp, "./data/synthetic/finetuning/sudoku_finetuning_cp.jsonl")

save_jsonl(gpt_one_call_formatted_qp, "./data/synthetic/finetuning/gpt_one_call_finetuning_qp.jsonl")
save_jsonl(gpt_one_call_formatted_cp, "./data/synthetic/finetuning/gpt_one_call_finetuning_cp.jsonl")

save_jsonl(gpt_two_call_formatted_qp, "./data/synthetic/finetuning/gpt_two_call_finetuning_qp.jsonl")
save_jsonl(gpt_two_call_formatted_cp, "./data/synthetic/finetuning/gpt_two_call_finetuning_cp.jsonl")

In [4]:
import os
from openpipe import OpenAI
from dotenv import load_dotenv

# Load api key
load_dotenv()
open_pipe = os.getenv("structural-reasoning")

client = OpenAI(openpipe={"api_key": f"{open_pipe}"})

In [5]:
# load test data
with open('./data/test/test-reference.json', 'r') as f:
    test_data = json.load(f)

test_qp, test_cp = split_dataset(test_data)

In [6]:
test_cp[0]

{'answer': 'b',
 'id': 162,
 'cot': "Since G goes to the United States, we need to analyze the conditions that follow. Condition (1) is not applicable since G is going to the US. Condition (2) is also not applicable since L's destination is not specified. Condition (3) does not provide any information about H, M, U, or W. Condition (4) states that U's destination is different from G's, which is the US, so U must go to the UK. Condition (5) is not applicable since Z's destination is not specified.",
 'cot_parsing': [{'statement': 'Condition (1) is not applicable',
   'evidence': 'Condition (1): If G goes to the UK, then H To the United States. | G is going to the US',
   'Verification': 'false'},
  {'statement': 'Condition (2) is also not applicable',
   'evidence': "Condition (2): If L goes to the UK, both M and U go to the US. | L's destination is not specified",
   'Verification': 'false'},
  {'statement': 'Condition (3) does not provide any information about H, M, U, or W',
   'evid

In [7]:
# Run inference pipeline
# Format question data
content_formated_question = format_qp_for_openpipe_finetuning(test_qp, system_prompt=qp_system_prompt)
content_questions = [item["messages"][1]["content"] for item in content_formated_question]

# Format CoT data
content_formated_cot = format_cp_for_openpipe_finetuning(test_cp, system_prompt=cp_system_prompt)
content_cots = [item["messages"][1]["content"] for item in content_formated_cot]

In [8]:
def run_pipeline(test_qp, test_cp, content_question,  content_cot, model_qp, model_cp):
    puzzle = {
            "question": test_qp['question'],
            "question_parsing": None,
            "answer": test_cp['answer'],
            "id": test_cp['id'],
            "sel_idx": test_cp['sel_idx'],
            "cot": test_cp['cot'],
            "cot_parsing": None
                }

    try:
        completion_qp = client.chat.completions.create(
            model=model_qp,
            messages=[
                {
                    "role": "system",
                    "content": qp_system_prompt
                },
                {
                    "role": "user",
                    "content": content_question
                }
            ],
            temperature=0
        )

        string_data = completion_qp.choices[0].message.content.replace('\'', '"')
        print(string_data)
        puzzle["question_parsing"] = json.loads(string_data)

        completion_cp = client.chat.completions.create(
            model=model_cp,
            messages=[
                {
                    "role": "system",
                    "content": cp_system_prompt
                },
                {
                    "role": "user",
                    "content": content_cot
                }
            ],
            temperature=0
        )

        print(completion_cp.choices[0].message.content)
        puzzle["cot_parsing"] = json.loads(completion_cp.choices[0].message.content)
        return puzzle
    except Exception as e:
        print(f"Error occurred: {e}")
        return None, None

In [11]:
name = 'gpt-two-call'  # 'logic701', gpt_one_call, gpt_two_calls, sudoku
results = []
error_n = 0
errors = []
for idx in range(len(test_data)):
    try:
        result = run_pipeline(test_qp[idx],
                              test_cp[idx],
                              content_question=content_questions[idx],
                              content_cot=content_cots[idx],
                              model_qp='openpipe:gpt-two-call-finetuning-cp',
                              model_cp='openpipe:gpt-two-call-finetuning-cp')
        results.append(result)
        # break
        # print(result)
        save_json(result, "./data/comparison/gpt-two-call/result-{}-{}.json".format(idx, name))
    except Exception as e:
        print(f"Error occurred: {e} at index {idx}")
        error_n += 1
        errors.append(idx)

[{"Brief description of objects and setup": "7 students G, H, L, M, U, W, Z; each goes to either UK or US"}, {"Rule 1 in simplified form": "If G goes to UK, H goes to US"}, {"Rule 2 in simplified form": "If L goes to UK, M and U go to US"}, {"Rule 3 in simplified form": "W and Z go to different countries"}, {"Rule 4 in simplified form": "U goes to different country than G"}, {"Rule 5 in simplified form": "If Z goes to UK, H goes to UK"}, {"Condition to evaluate": "G goes to US; must be true"}], {"[\"If G goes to US, then by rule 4, U must go to UK. Therefore, option D W goes to America must be true.\"]": "true"}
Error occurred: Extra data: line 1 column 499 (char 498)
[{"The six clear flower porcelains are S, Y, M, Q, K, X.", "Each piece has a different production time.", "Exhibitions are sorted from earliest to latest."], ["M is older than X.", "If Y is earlier than M, then Q is earlier than K and X.", "If M is older than Y, then K is older than Q and X.", "S"s age is either earlier t

In [12]:
print(error_n)

0


In [67]:
data_combined = load_all_json_files(folder_path='./data/comparison/gpt_one_call')
save_json(data_combined, "./data/comparison/sudoku-combined.json")

Loaded 21 JSON files from ./data/comparison/gpt_one_call


True

In [8]:
lines = string_data.split('\n')
string_data = completion.choices[0].message.content.replace('\'', '"')

json.loads(string_data)