In [5]:
import pandas as pd
import time
import openai
import os
from dotenv import load_dotenv, find_dotenv
from tqdm import tqdm



In [6]:
load_dotenv(os.path.join(os.path.expanduser('~'), '.gpt_config.env'))
API_KEY = os.environ.get('API_KEY')
API_VERSION = os.environ.get('API_VERSION')
RESOURCE_ENDPOINT = os.environ.get('RESOURCE_ENDPOINT')

openai.api_type = "azure"
openai.api_key = API_KEY
openai.api_base = RESOURCE_ENDPOINT
openai.api_version = API_VERSION



In [7]:
def get_GPT_response(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature=0):
    response = openai.ChatCompletion.create(
        temperature=temperature, 
        deployment_id=chat_deployment_id,
        model=chat_model_id,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": instruction}
        ]
    )
    if 'choices' in response \
    and isinstance(response['choices'], list) \
    and len(response) >= 0 \
    and 'message' in response['choices'][0] \
    and 'content' in response['choices'][0]['message']:
        return response['choices'][0]['message']['content']
    else:
        return 'Unexpected response'


In [15]:
CHAT_MODEL_ID = "gpt-35-turbo"
CHAT_DEPLOYMENT_ID = None
QUESTION_PATH = "../../../data/benchmark_datasets/test_questions_one_hop_true_false_v2.csv"
SAVE_PATH = "../../../data/analysis_results"


In [16]:
save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_prompt_based_true_false_binary_response.csv"


In [17]:
# GPT config params
temperature = 0

if not CHAT_DEPLOYMENT_ID:
    CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID


In [18]:
system_prompt = """
You are an expert biomedical researcher. For the Question asked, please provide your answer in the following JSON format. 
{{
  "answer": "True"
}}
OR
{{
  "answer": "False"
}}
"""


In [19]:
%%time

question_df = pd.read_csv(QUESTION_PATH)
answer_list = []
for index, row in tqdm(question_df.iterrows()):
    question = "Question: "+ row["text"]
    output = get_GPT_response(question, system_prompt, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=temperature)
    answer_list.append((row["text"], row["label"], output))                 
answer_df = pd.DataFrame(answer_list, columns=["question", "label", "llm_answer"])
answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True)


322it [02:33,  2.10it/s]

CPU times: user 3.66 s, sys: 483 ms, total: 4.14 s
Wall time: 2min 33s





In [None]:
# %%time

# question_df = pd.read_csv(QUESTION_PATH)
# answer_list = []
# for index, row in tqdm(question_df.iterrows()):
#     question = "Question: "+ row["text"]
#     output = get_GPT_response(question, system_prompt, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=temperature)
#     answer_list.append((row["text"], row["correct_node"], output))                  
# answer_df = pd.DataFrame(answer_list, columns=["question", "correct_answer", "llm_answer"])
# answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True)
