In [1]:
import os

import glob
import pandas as pd
import random
import copy
from llmchem.utils import  make_project_dirs
from llmchem.eval import eval_model

# dataset settings
n_test = 50  # number of testing data
# number of training data for checking (i.e., checking everything takes too long, so we check only a part of training data)
n_train_check = 50

n_prompt_examples = 5

# dataset path
dataset_path = "dataset/231225AutoReasoning/240117best_reason_record_11k.csv"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#evaluation w/o training

# %%
model_dict = {
    "GPT-3.5-turbo": {
        "name": "gpt-3.5-turbo-1106",
        "modules": [
        ]
    },
    "GPT-4-turbo":{
    "name":"gpt-4-1106-preview",
    "modules":[]
    },
    "GPT-3.5-turbo-FT10":{
    "name":"ft:gpt-3.5-turbo-1106:personal::8kicrQm5",
    "modules":[]
    },
    "GPT-3.5-turbo-FT20":{
    "name":"ft:gpt-3.5-turbo-1106:personal::8kiw9gBe",
    "modules":[]
    },
    "GPT-3.5-turbo-FT50":{
    "name":"ft:gpt-3.5-turbo-1106:personal::8kiwu9s0",
    "modules":[]
    },
    "GPT-3.5-turbo-FT100":{
    "name":"ft:gpt-3.5-turbo-1106:personal::8kj2LEBW",
    "modules":[]
    },
    "GPT-3.5-turbo-FT1000":{
    "name":"",
    "modules":[]
    },
    "GPT-3.5-turbo-FT2000":{
    "name":"",
    "modules":[]
    },

}


df = pd.read_csv(dataset_path)

#dummy values
epochs=3
r=32
n_train=0

for model_nickname in model_dict:
    model_name = model_dict[model_nickname]["name"]
    if model_name=="":
        continue

    if model_nickname.find("FT") >= 0:
        n_train = int(model_nickname.split("FT")[-1])
        model_nickname_="GPT-3.5-turbo"
    else:
        model_nickname_=model_nickname
        n_train=0
    

    for with_reason in [False, True]:
        print("trial----")
        print(f"model {model_name} reason: {with_reason}")
        # project path
        if with_reason:
            project_dir = f"results/projects/240118comparisons/{model_nickname_}_{epochs}_{r}_{n_train}"
        else:
            project_dir = f"results/projects/240118comparisons_wo_reason/{model_nickname_}_{epochs}_{r}_{n_train}"
        print("Task :", project_dir)


        # make project dir
        make_project_dirs(project_dir)

        # load dataset
        dataset = df.to_dict(orient="records")
        random.seed(0)
        random.shuffle(dataset)

        # prediction without reason
        if not with_reason:
            for data in dataset:
                data["Reason"] = "-"

        train_dataset = dataset[:max(n_train, n_prompt_examples)]
        test_dataset = dataset[-n_test:]

        random.shuffle(train_dataset)
        # eval
        train_check_dataset = copy.deepcopy(
            train_dataset[:n_train_check])
        random.shuffle(train_check_dataset)
        model=model_name
        tokenizer=None
        if len(glob.glob(f"{project_dir}/eval/test*")) > 0:
            print(f"test exists: {project_dir}")
        else:
            test_eval_result = eval_model(model, tokenizer, test_dataset,
                                            f"{project_dir}/eval",
                                            n_prompt_examples=n_prompt_examples,
                                            prefix=f"test",
                                            gpt_mode=True,
                                            n_max_trials=3
                                            )
        if len(glob.glob(f"{project_dir}/eval/train*")) > 0:
            print(f"train exists: {project_dir}")
        else:
            if n_train>0:
                train_eval_result = eval_model(model, tokenizer, train_check_dataset,
                                        f"{project_dir}/eval",
                                        n_prompt_examples=n_prompt_examples,
                                        prefix=f"train",
                                        gpt_mode=True,
                                        n_max_trials=3
                                        )

trial----
model gpt-3.5-turbo-1106 reason: False
Task : results/projects/240118comparisons_wo_reason/GPT-3.5-turbo_3_32_0
test exists: results/projects/240118comparisons_wo_reason/GPT-3.5-turbo_3_32_0


  0%|          | 0/5 [00:00<?, ?it/s]

promlem 1 / 5
----


##Reason: The molecule contains functional groups that can participate in various chemical reactions, leading to potential biological activity.


In [None]:
from llmchem.dataset import generate_question_prompt,gen_train_text


In [None]:
import json
# JSONL形式でファイルに書き出す
ft_text_base_dir="results/projects/240125gpt3_train_text"
for n_train in [ 5, 10, 20, 50, 100, 1000, 2000, 5000, 10000]:
    for with_reason in [False, True]:
        # load dataset
        dataset = df.to_dict(orient="records")
        random.seed(0)
        random.shuffle(dataset)

        # prediction without reason
        if not with_reason:
            for data in dataset:
                data["Reason"] = "-"

        train_dataset = dataset[:max(n_train, n_prompt_examples)]
        test_dataset = dataset[-n_test:]

        random.shuffle(train_dataset)
        train_json_list=[]
        for train_id in range(len(train_dataset)):
            question = generate_question_prompt(
                train_dataset, train_id, n_prompt_examples=0).strip()

            answer=f""" {train_dataset[train_id]["Reason"]}
##Prediction: {train_dataset[train_id]["Prediction(integer)"]}"""
            json_data={"messages": [
                {"role": "system", "content":"You are a professional chemist, who only output ##Reason and ##Prediction. Complete the text. Never output anything else."},
                {"role": "user", "content": question},
                                        {"role": "assistant", "content": answer}]}

            train_json_list.append(json_data)
        with open(f"{ft_text_base_dir}/ft{n_train}.jsonl", 'w') as file:
            for data in train_json_list:
                json_data = json.dumps(data)
                file.write(json_data + '\n')