In [1]:
import pandas as pd
import openai
import json
import time
import matplotlib.pyplot as plt
from tqdm import tqdm  # Import tqdm for the progress bar
from sklearn.metrics import accuracy_score, classification_report, balanced_accuracy_score
from sklearn.metrics import balanced_accuracy_score
import numpy as np

In [2]:
#pip install openai

In [3]:
from openai import OpenAI

In [4]:
openai.__version__


'1.1.1'

### Read in annotated data

In [5]:
df = pd.read_csv("../data/data_splits_stratified/6-2-2_all_classes_enriched_with_kw/test.csv", index_col=0)

In [6]:
df.shape

(534, 8)

In [7]:
df.head()

Unnamed: 0_level_0,pmid,journal_name,title,abstract,keywords,accepted_label,multi_label,binary_label
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,12047012,Schizophrenia bulletin,Can clinical practice guide a research agenda?,Articles from this issue of the Bulletin indic...,,Non-systematic-review,1,0
2,28832188,Future medicinal chemistry,Dual/multitargeted xanthone derivatives for Al...,"To date, the current therapy for Alzheimer's d...",Alzheimer's disease| Aβ aggregation| anticholi...,Non-systematic-review,1,0
3,17678496,Expert review of neurotherapeutics,Benefits of occupational therapy in stroke reh...,Stroke is the largest single cause of severe p...,,Non-systematic-review,1,0
4,25649308,Annals of the New York Academy of Sciences,The promise of ketamine for treatment-resistan...,Major depressive disorder (MDD) is one of the ...,antidepressant| bipolar disorder| ketamine| ma...,Non-systematic-review,1,0
5,6312596,La semaine des hopitaux : organe fonde par l'A...,[Clinical and pathogenic aspects of secondary ...,The secondary hyperlipoproteinemias are freque...,,Non-systematic-review,1,0


In [8]:
# Combine the columns
# Implementing custom tags for the combination of journal name, title, and abstract
df['input_journal_title_abstract'] = '<journal>' + df['journal_name'] + '</journal>' + \
                                     '<title>' + df['title'] + '</title>' + \
                                     '<abstract>' + df['abstract'] + '</abstract>'

# Implementing custom tags for the combination of title and abstract only
df['input_title_abstract'] = '<title>' + df['title'] + '</title>' + \
                             '<abstract>' + df['abstract'] + '</abstract>'

In [9]:
df.head(5)

Unnamed: 0_level_0,pmid,journal_name,title,abstract,keywords,accepted_label,multi_label,binary_label,input_journal_title_abstract,input_title_abstract
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,12047012,Schizophrenia bulletin,Can clinical practice guide a research agenda?,Articles from this issue of the Bulletin indic...,,Non-systematic-review,1,0,<journal>Schizophrenia bulletin</journal><titl...,<title>Can clinical practice guide a research ...
2,28832188,Future medicinal chemistry,Dual/multitargeted xanthone derivatives for Al...,"To date, the current therapy for Alzheimer's d...",Alzheimer's disease| Aβ aggregation| anticholi...,Non-systematic-review,1,0,<journal>Future medicinal chemistry</journal><...,<title>Dual/multitargeted xanthone derivatives...
3,17678496,Expert review of neurotherapeutics,Benefits of occupational therapy in stroke reh...,Stroke is the largest single cause of severe p...,,Non-systematic-review,1,0,<journal>Expert review of neurotherapeutics</j...,<title>Benefits of occupational therapy in str...
4,25649308,Annals of the New York Academy of Sciences,The promise of ketamine for treatment-resistan...,Major depressive disorder (MDD) is one of the ...,antidepressant| bipolar disorder| ketamine| ma...,Non-systematic-review,1,0,<journal>Annals of the New York Academy of Sci...,<title>The promise of ketamine for treatment-r...
5,6312596,La semaine des hopitaux : organe fonde par l'A...,[Clinical and pathogenic aspects of secondary ...,The secondary hyperlipoproteinemias are freque...,,Non-systematic-review,1,0,<journal>La semaine des hopitaux : organe fond...,<title>[Clinical and pathogenic aspects of sec...


### Load key for the OpenAI API 

In [10]:
def load_pass(file_path, key_to_find):
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split("=")
            if len(parts) == 2 and parts[0] == key_to_find:
                found_password = parts[1]
                break
    if found_password:
        print("Found password.")
        return found_password
    else:
        print("Password not found for key:", key_to_find)

Note: You need to create a credentials.txt file with the following content:  
OPENAI=sk-77QXXXXXXXXXXXXXXXXXXXXXXXXXXX  
replace the value after the = sign with your API key.  
Make sure the credentials.txt is added to .gitignore, you don't want to put your password on Git!

In [11]:
openai.api_key = load_pass("./credentials.txt", "OPENAI")


Found password.


In [12]:
client = OpenAI(api_key=openai.api_key)

### Query GPT

To change the task the model is solving, you need to change the text of the prompt and the content text of the system role.  
To change the GPT model used, you need to change the text of the model name when initiating the openai API.  
The function gets as input the input_raw_text, that will be text for information extraction or classification.

In [13]:
import time

DEFAULT_TEMPERATURE = 0
DEFAULT_MAX_TOKENS = 500
DEFAULT_MODEL = "gpt-4-turbo-preview"

def query_gpt(input_raw_text, prompt_text, gpt_model="gpt-4-turbo-preview", temperature=0, max_retries=5, retry_delay=3):
    # CHANGE gpt_model to the desired model name, see https://platform.openai.com/docs/models (gpt-3.5-turbo and gpt-4-turbo-preview)
    
    # Add a delay at the beginning of the function to avoid overloading the API if there are multiple calls
    # time.sleep(10)  

    system_msg = f"""
    You are an expert assistant specialized in text classification of PubMed abstracts. """

    retries = 0
    while retries < max_retries:
        print("Trying to call OpenAI API...")
        try:
            completion = client.chat.completions.create(
                model=gpt_model,  
                response_format={"type": "json_object"},
                temperature=temperature,
                #max_tokens=2000,
                messages=[
                    {"role": "system", "content": system_msg},
                    {"role": "user", "content": prompt_text + input_raw_text}
                ]
            )
            return completion.choices[0].message.content
        except Exception as e:
            # Handle API error, e.g., retry or log
            print(f"OpenAI API returned an error: {e}")
            time.sleep(retry_delay)  # Wait before retrying
            retries += 1

    raise RuntimeError("Max retries reached. Unable to complete the API call.")


In [14]:
# Define a function to apply GPT queries with a progress bar
def apply_gpt_with_progress(data_series, prompt_text, model="gpt-4-turbo-preview"):
    results = []
    total_items = len(data_series)
    # Create a tqdm progress bar
    with tqdm(total=total_items, desc=f"Processing dataset") as pbar:
        for text in data_series:
            result = query_gpt(text, prompt_text, model)
            results.append(result)
            pbar.update(1)  # Update the progress bar

    return results

# Hierarchical: Read binary and further classify

In [15]:
# Parameters to select Binary Predictions file
prompt_ids_to_eval = ["P3"] #, "P2", "P3", "P4"
model = "gpt-4-turbo-preview"
file_to_save_suffix = "hierarchical"

# The predictions based on which prompt to use
prompt_id_to_use_for_binary = "P3"

In [16]:
# Create the list of prediction columns based on the prompt IDs
prediction_columns = [f'gpt_predictions_{prompt_id_to_use_for_binary}']
# Specify the basic columns to include in the DataFrame
basic_columns = ['pmid', 'input_journal_title_abstract', 'accepted_label', 'multi_label', 'binary_label']
# Combine basic columns with the dynamically generated prediction columns
columns_to_read = basic_columns + prediction_columns

In [17]:
target_file_for_eval = f"predictions/{model}_enriched_kw_test_outputs_{'_'.join(prompt_ids_to_eval)}_binary.csv"
df_binary = pd.read_csv(target_file_for_eval)[columns_to_read]
col_name_binary = f'{prediction_columns[0]}_binary'
df_binary.rename(columns={prediction_columns[0]: col_name_binary}, inplace=True)

In [18]:
df_binary.head()

Unnamed: 0,pmid,input_journal_title_abstract,accepted_label,multi_label,binary_label,gpt_predictions_P3_binary
0,12047012,<journal>Schizophrenia bulletin</journal><titl...,Non-systematic-review,1,0,OTHER
1,28832188,<journal>Future medicinal chemistry</journal><...,Non-systematic-review,1,0,OTHER
2,17678496,<journal>Expert review of neurotherapeutics</j...,Non-systematic-review,1,0,OTHER
3,25649308,<journal>Annals of the New York Academy of Sci...,Non-systematic-review,1,0,OTHER
4,6312596,<journal>La semaine des hopitaux : organe fond...,Non-systematic-review,1,0,OTHER


In [19]:
prediction_columns

['gpt_predictions_P3']

## Further classify studies depending on binary

In [20]:
df_animal = df_binary[df_binary[col_name_binary] == 'ANIMAL'] 
df_animal.head()

Unnamed: 0,pmid,input_journal_title_abstract,accepted_label,multi_label,binary_label,gpt_predictions_P3_binary
21,32147509,<journal>Neuroscience</journal><title>Neurobio...,Non-systematic-review,1,0,ANIMAL
33,16312938,<journal>Zhongguo zhen jiu = Chinese acupunctu...,Non-systematic-review,1,0,ANIMAL
71,23811310,<journal>Biochemical pharmacology</journal><ti...,Non-systematic-review,1,0,ANIMAL
157,33846423,<journal>Scientific reports</journal><title>ST...,Remaining,0,0,ANIMAL
264,11909745,<journal>Microbes and infection</journal><titl...,Remaining,0,0,ANIMAL


In [21]:
df_other = df_binary[df_binary[col_name_binary] == 'OTHER'] 
df_other.head()

Unnamed: 0,pmid,input_journal_title_abstract,accepted_label,multi_label,binary_label,gpt_predictions_P3_binary
0,12047012,<journal>Schizophrenia bulletin</journal><titl...,Non-systematic-review,1,0,OTHER
1,28832188,<journal>Future medicinal chemistry</journal><...,Non-systematic-review,1,0,OTHER
2,17678496,<journal>Expert review of neurotherapeutics</j...,Non-systematic-review,1,0,OTHER
3,25649308,<journal>Annals of the New York Academy of Sci...,Non-systematic-review,1,0,OTHER
4,6312596,<journal>La semaine des hopitaux : organe fond...,Non-systematic-review,1,0,OTHER


In [22]:
json_file_path = "./prompt_strategies_hierarchical_shirin.json"
# Load the JSON file
with open(json_file_path, 'r') as file:
    prompts_data = json.load(file)

In [23]:
# Add the IDs of Prompts that you want to test
prompt_ids_to_test = ["P2_HIERARCHY"] #, "P2", "P3", "P4"
model = "gpt-4-turbo-preview"
file_to_save_suffix = "hierarchical"

# PROCESS ANIMAL

for prompt in prompts_data["prompts"]:
    prompt_id = prompt["id"]
    prompt_text = prompt["text_animal"]

    if prompt_id in prompt_ids_to_test:
        # Apply GPT predictions
        df_animal[f'gpt_predictions_{prompt_id}_raw'] = apply_gpt_with_progress(df_animal['input_journal_title_abstract'], prompt_text, model)
        ## the below includes error handling in case the json formatting did not work as expected
        df_animal[f'gpt_predictions_{prompt_id}'] = df_animal[f'gpt_predictions_{prompt_id}_raw'].apply(
            lambda x: json.loads(x)['gpt_label'] if isinstance(x, str) and 'gpt_label' in json.loads(x) else x
        )
        df.to_csv(f"predictions/{model}_enriched_kw_test_outputs_{prompt_id_to_use_for_binary}_{'_'.join(prompt_ids_to_test)}_{file_to_save_suffix}.csv") # saving after each prompt strategy has been run, in order not to loose information in case a later strategy fails
    else:
        print(f"Skipping prompt {prompt_id}")

# PROCESS OTHER

for prompt in prompts_data["prompts"]:
    prompt_id = prompt["id"]
    prompt_text = prompt["text_other"]

    if prompt_id in prompt_ids_to_test:
        # Apply GPT predictions
        df_other[f'gpt_predictions_{prompt_id}_raw'] = apply_gpt_with_progress(df_other['input_journal_title_abstract'], prompt_text, model)
        ## the below includes error handling in case the json formatting did not work as expected
        df_other[f'gpt_predictions_{prompt_id}'] = df_other[f'gpt_predictions_{prompt_id}_raw'].apply(
            lambda x: json.loads(x)['gpt_label'] if isinstance(x, str) and 'gpt_label' in json.loads(x) else x
        )
        df.to_csv(f"predictions/{model}_enriched_kw_test_outputs_{prompt_id_to_use_for_binary}_{'_'.join(prompt_ids_to_test)}_{file_to_save_suffix}.csv") # saving after each prompt strategy has been run, in order not to loose information in case a later strategy fails
    else:
        print(f"Skipping prompt {prompt_id}")

Skipping prompt P1_HIERARCHY


Processing dataset:   0%|                                | 0/90 [00:00<?, ?it/s]

Trying to call OpenAI API...


Processing dataset:   1%|▎                       | 1/90 [00:01<02:39,  1.79s/it]

Trying to call OpenAI API...


Processing dataset:   2%|▌                       | 2/90 [00:02<02:03,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:   3%|▊                       | 3/90 [00:05<02:58,  2.05s/it]

Trying to call OpenAI API...


Processing dataset:   4%|█                       | 4/90 [00:07<02:32,  1.77s/it]

Trying to call OpenAI API...


Processing dataset:   6%|█▎                      | 5/90 [00:08<02:12,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:   7%|█▌                      | 6/90 [00:10<02:37,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:   8%|█▊                      | 7/90 [00:12<02:20,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:   9%|██▏                     | 8/90 [00:13<02:02,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  10%|██▍                     | 9/90 [00:14<01:53,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  11%|██▌                    | 10/90 [00:15<01:50,  1.38s/it]

Trying to call OpenAI API...


Processing dataset:  12%|██▊                    | 11/90 [00:17<01:51,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  13%|███                    | 12/90 [00:18<01:40,  1.29s/it]

Trying to call OpenAI API...


Processing dataset:  14%|███▎                   | 13/90 [00:20<02:04,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  16%|███▌                   | 14/90 [00:22<02:11,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  17%|███▊                   | 15/90 [00:23<01:57,  1.57s/it]

Trying to call OpenAI API...


Processing dataset:  18%|████                   | 16/90 [00:25<02:00,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  19%|████▎                  | 17/90 [00:26<01:46,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  20%|████▌                  | 18/90 [00:28<01:49,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  21%|████▊                  | 19/90 [00:29<01:43,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  22%|█████                  | 20/90 [00:30<01:38,  1.40s/it]

Trying to call OpenAI API...


Processing dataset:  23%|█████▎                 | 21/90 [00:31<01:30,  1.31s/it]

Trying to call OpenAI API...


Processing dataset:  24%|█████▌                 | 22/90 [00:33<01:26,  1.27s/it]

Trying to call OpenAI API...


Processing dataset:  26%|█████▉                 | 23/90 [00:34<01:31,  1.37s/it]

Trying to call OpenAI API...


Processing dataset:  27%|██████▏                | 24/90 [00:36<01:40,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  28%|██████▍                | 25/90 [00:38<01:37,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  29%|██████▋                | 26/90 [00:39<01:34,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  30%|██████▉                | 27/90 [00:41<01:36,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  31%|███████▏               | 28/90 [00:43<01:44,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  32%|███████▍               | 29/90 [00:44<01:34,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  33%|███████▋               | 30/90 [00:45<01:34,  1.57s/it]

Trying to call OpenAI API...


Processing dataset:  34%|███████▉               | 31/90 [00:47<01:30,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  36%|████████▏              | 32/90 [00:51<02:17,  2.36s/it]

Trying to call OpenAI API...


Processing dataset:  37%|████████▍              | 33/90 [00:53<02:00,  2.11s/it]

Trying to call OpenAI API...


Processing dataset:  38%|████████▋              | 34/90 [00:55<01:59,  2.13s/it]

Trying to call OpenAI API...


Processing dataset:  39%|████████▉              | 35/90 [00:57<01:48,  1.98s/it]

Trying to call OpenAI API...


Processing dataset:  40%|█████████▏             | 36/90 [00:58<01:38,  1.82s/it]

Trying to call OpenAI API...


Processing dataset:  41%|█████████▍             | 37/90 [01:02<02:12,  2.50s/it]

Trying to call OpenAI API...


Processing dataset:  42%|█████████▋             | 38/90 [01:04<01:57,  2.26s/it]

Trying to call OpenAI API...


Processing dataset:  43%|█████████▉             | 39/90 [01:06<01:47,  2.11s/it]

Trying to call OpenAI API...


Processing dataset:  44%|██████████▏            | 40/90 [01:07<01:32,  1.85s/it]

Trying to call OpenAI API...


Processing dataset:  46%|██████████▍            | 41/90 [01:08<01:27,  1.79s/it]

Trying to call OpenAI API...


Processing dataset:  47%|██████████▋            | 42/90 [01:10<01:15,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  48%|██████████▉            | 43/90 [01:11<01:16,  1.63s/it]

Trying to call OpenAI API...


Processing dataset:  49%|███████████▏           | 44/90 [01:12<01:06,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  50%|███████████▌           | 45/90 [01:14<01:06,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  51%|███████████▊           | 46/90 [01:15<00:59,  1.35s/it]

Trying to call OpenAI API...


Processing dataset:  52%|████████████           | 47/90 [01:16<00:57,  1.35s/it]

Trying to call OpenAI API...


Processing dataset:  53%|████████████▎          | 48/90 [01:18<00:59,  1.42s/it]

Trying to call OpenAI API...


Processing dataset:  54%|████████████▌          | 49/90 [01:19<00:56,  1.37s/it]

Trying to call OpenAI API...


Processing dataset:  56%|████████████▊          | 50/90 [01:20<00:52,  1.32s/it]

Trying to call OpenAI API...


Processing dataset:  57%|█████████████          | 51/90 [01:22<00:50,  1.30s/it]

Trying to call OpenAI API...


Processing dataset:  58%|█████████████▎         | 52/90 [01:23<00:48,  1.28s/it]

Trying to call OpenAI API...


Processing dataset:  59%|█████████████▌         | 53/90 [01:24<00:48,  1.32s/it]

Trying to call OpenAI API...


Processing dataset:  60%|█████████████▊         | 54/90 [01:26<00:48,  1.36s/it]

Trying to call OpenAI API...


Processing dataset:  61%|██████████████         | 55/90 [01:27<00:46,  1.32s/it]

Trying to call OpenAI API...


Processing dataset:  62%|██████████████▎        | 56/90 [01:28<00:48,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  63%|██████████████▌        | 57/90 [01:30<00:46,  1.42s/it]

Trying to call OpenAI API...


Processing dataset:  64%|██████████████▊        | 58/90 [01:31<00:41,  1.30s/it]

Trying to call OpenAI API...


Processing dataset:  66%|███████████████        | 59/90 [01:33<00:44,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  67%|███████████████▎       | 60/90 [01:34<00:42,  1.40s/it]

Trying to call OpenAI API...


Processing dataset:  68%|███████████████▌       | 61/90 [01:36<00:41,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  69%|███████████████▊       | 62/90 [01:37<00:39,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  70%|████████████████       | 63/90 [01:39<00:45,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  71%|████████████████▎      | 64/90 [01:40<00:39,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  72%|████████████████▌      | 65/90 [01:42<00:35,  1.44s/it]

Trying to call OpenAI API...


Processing dataset:  73%|████████████████▊      | 66/90 [01:43<00:36,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  74%|█████████████████      | 67/90 [01:45<00:34,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  76%|█████████████████▍     | 68/90 [01:46<00:33,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  77%|█████████████████▋     | 69/90 [01:48<00:31,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  78%|█████████████████▉     | 70/90 [01:49<00:29,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  79%|██████████████████▏    | 71/90 [01:50<00:26,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  80%|██████████████████▍    | 72/90 [01:52<00:26,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  81%|██████████████████▋    | 73/90 [01:53<00:23,  1.37s/it]

Trying to call OpenAI API...


Processing dataset:  82%|██████████████████▉    | 74/90 [01:55<00:23,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  83%|███████████████████▏   | 75/90 [01:57<00:22,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  84%|███████████████████▍   | 76/90 [01:59<00:26,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:  86%|███████████████████▋   | 77/90 [02:01<00:23,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  87%|███████████████████▉   | 78/90 [02:03<00:22,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:  88%|████████████████████▏  | 79/90 [02:04<00:19,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  89%|████████████████████▍  | 80/90 [02:06<00:18,  1.83s/it]

Trying to call OpenAI API...


Processing dataset:  90%|████████████████████▋  | 81/90 [02:08<00:15,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  91%|████████████████████▉  | 82/90 [02:09<00:13,  1.67s/it]

Trying to call OpenAI API...


Processing dataset:  92%|█████████████████████▏ | 83/90 [02:11<00:11,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  93%|█████████████████████▍ | 84/90 [02:13<00:10,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:  94%|█████████████████████▋ | 85/90 [02:14<00:07,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  96%|█████████████████████▉ | 86/90 [02:16<00:06,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  97%|██████████████████████▏| 87/90 [02:17<00:04,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  98%|██████████████████████▍| 88/90 [02:19<00:03,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  99%|██████████████████████▋| 89/90 [02:20<00:01,  1.49s/it]

Trying to call OpenAI API...


Processing dataset: 100%|███████████████████████| 90/90 [02:22<00:00,  1.58s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_animal[f'gpt_predictions_{prompt_id}_raw'] = apply_gpt_with_progress(df_animal['input_journal_title_abstract'], prompt_text, model)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_animal[f'gpt_predictions_{prompt_id}'] = df_animal[f'gpt_predictions_{prompt_id}_raw'].apply(


Skipping prompt P1_HIERARCHY


Processing dataset:   0%|                               | 0/444 [00:00<?, ?it/s]

Trying to call OpenAI API...


Processing dataset:   0%|                       | 1/444 [00:02<17:38,  2.39s/it]

Trying to call OpenAI API...


Processing dataset:   0%|                       | 2/444 [00:04<14:21,  1.95s/it]

Trying to call OpenAI API...


Processing dataset:   1%|▏                      | 3/444 [00:05<13:58,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:   1%|▏                      | 4/444 [00:07<11:54,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:   1%|▎                      | 5/444 [00:08<12:31,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:   1%|▎                      | 6/444 [00:10<12:19,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:   2%|▎                      | 7/444 [00:12<12:54,  1.77s/it]

Trying to call OpenAI API...


Processing dataset:   2%|▍                      | 8/444 [00:14<13:16,  1.83s/it]

Trying to call OpenAI API...


Processing dataset:   2%|▍                      | 9/444 [00:16<12:34,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:   2%|▍                     | 10/444 [00:17<12:20,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:   2%|▌                     | 11/444 [00:19<12:50,  1.78s/it]

Trying to call OpenAI API...


Processing dataset:   3%|▌                     | 12/444 [00:20<11:36,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:   3%|▋                     | 13/444 [00:22<11:11,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:   3%|▋                     | 14/444 [00:24<11:46,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:   3%|▋                     | 15/444 [00:25<11:44,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:   4%|▊                     | 16/444 [00:27<12:08,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:   4%|▊                     | 17/444 [00:28<11:06,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:   4%|▉                     | 18/444 [00:30<12:07,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:   4%|▉                     | 19/444 [00:32<11:56,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:   5%|▉                     | 20/444 [00:34<11:57,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:   5%|█                     | 21/444 [00:35<11:25,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:   5%|█                     | 22/444 [00:36<10:44,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:   5%|█▏                    | 23/444 [00:38<10:26,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:   5%|█▏                    | 24/444 [00:39<10:27,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:   6%|█▏                    | 25/444 [00:41<09:52,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:   6%|█▎                    | 26/444 [00:42<09:58,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:   6%|█▎                    | 27/444 [00:44<12:00,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:   6%|█▍                    | 28/444 [00:46<10:58,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:   7%|█▍                    | 29/444 [00:48<12:18,  1.78s/it]

Trying to call OpenAI API...


Processing dataset:   7%|█▍                    | 30/444 [00:50<13:16,  1.92s/it]

Trying to call OpenAI API...


Processing dataset:   7%|█▌                    | 31/444 [00:52<12:25,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:   7%|█▌                    | 32/444 [00:53<11:50,  1.72s/it]

Trying to call OpenAI API...


Processing dataset:   7%|█▋                    | 33/444 [00:55<12:03,  1.76s/it]

Trying to call OpenAI API...


Processing dataset:   8%|█▋                    | 34/444 [00:57<11:21,  1.66s/it]

Trying to call OpenAI API...


Processing dataset:   8%|█▋                    | 35/444 [00:59<13:47,  2.02s/it]

Trying to call OpenAI API...


Processing dataset:   8%|█▊                    | 36/444 [01:01<12:56,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:   8%|█▊                    | 37/444 [01:03<12:49,  1.89s/it]

Trying to call OpenAI API...


Processing dataset:   9%|█▉                    | 38/444 [01:04<11:26,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:   9%|█▉                    | 39/444 [01:06<12:33,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:   9%|█▉                    | 40/444 [01:07<10:49,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:   9%|██                    | 41/444 [01:09<10:02,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:   9%|██                    | 42/444 [01:10<08:45,  1.31s/it]

Trying to call OpenAI API...


Processing dataset:  10%|██▏                   | 43/444 [01:11<09:42,  1.45s/it]

Trying to call OpenAI API...


Processing dataset:  10%|██▏                   | 44/444 [01:13<09:51,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  10%|██▏                   | 45/444 [01:14<09:32,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  10%|██▎                   | 46/444 [01:16<09:55,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  11%|██▎                   | 47/444 [01:17<09:45,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  11%|██▍                   | 48/444 [01:20<11:17,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  11%|██▍                   | 49/444 [01:21<10:51,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  11%|██▍                   | 50/444 [01:22<10:15,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  11%|██▌                   | 51/444 [01:24<09:58,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  12%|██▌                   | 52/444 [01:25<10:10,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  12%|██▋                   | 53/444 [01:27<09:30,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  12%|██▋                   | 54/444 [01:28<09:49,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  12%|██▋                   | 55/444 [01:30<09:39,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:  13%|██▊                   | 56/444 [01:31<09:05,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  13%|██▊                   | 57/444 [01:34<11:31,  1.79s/it]

Trying to call OpenAI API...


Processing dataset:  13%|██▊                   | 58/444 [01:35<11:00,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  13%|██▉                   | 59/444 [01:36<09:51,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  14%|██▉                   | 60/444 [01:38<09:37,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  14%|███                   | 61/444 [01:39<09:08,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  14%|███                   | 62/444 [01:40<09:03,  1.42s/it]

Trying to call OpenAI API...


Processing dataset:  14%|███                   | 63/444 [01:42<08:39,  1.36s/it]

Trying to call OpenAI API...


Processing dataset:  14%|███▏                  | 64/444 [01:44<10:14,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  15%|███▏                  | 65/444 [01:45<09:33,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  15%|███▎                  | 66/444 [01:47<09:46,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  15%|███▎                  | 67/444 [01:51<14:09,  2.25s/it]

Trying to call OpenAI API...


Processing dataset:  15%|███▎                  | 68/444 [01:52<12:11,  1.95s/it]

Trying to call OpenAI API...


Processing dataset:  16%|███▍                  | 69/444 [01:53<10:49,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  16%|███▍                  | 70/444 [01:54<10:02,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  16%|███▌                  | 71/444 [01:56<10:16,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  16%|███▌                  | 72/444 [01:58<09:49,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  16%|███▌                  | 73/444 [01:59<09:53,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  17%|███▋                  | 74/444 [02:01<10:36,  1.72s/it]

Trying to call OpenAI API...


Processing dataset:  17%|███▋                  | 75/444 [02:07<17:18,  2.82s/it]

Trying to call OpenAI API...


Processing dataset:  17%|███▊                  | 76/444 [02:08<15:06,  2.46s/it]

Trying to call OpenAI API...


Processing dataset:  17%|███▊                  | 77/444 [02:10<13:33,  2.22s/it]

Trying to call OpenAI API...


Processing dataset:  18%|███▊                  | 78/444 [02:12<12:50,  2.11s/it]

Trying to call OpenAI API...


Processing dataset:  18%|███▉                  | 79/444 [02:14<12:58,  2.13s/it]

Trying to call OpenAI API...


Processing dataset:  18%|███▉                  | 80/444 [02:16<12:08,  2.00s/it]

Trying to call OpenAI API...


Processing dataset:  18%|████                  | 81/444 [02:17<11:14,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  18%|████                  | 82/444 [02:19<11:22,  1.89s/it]

Trying to call OpenAI API...


Processing dataset:  19%|████                  | 83/444 [02:21<10:54,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  19%|████▏                 | 84/444 [02:22<10:11,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  19%|████▏                 | 85/444 [02:24<10:03,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  19%|████▎                 | 86/444 [02:25<09:35,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  20%|████▎                 | 87/444 [02:27<10:19,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  20%|████▎                 | 88/444 [02:29<09:47,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  20%|████▍                 | 89/444 [02:30<09:43,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  20%|████▍                 | 90/444 [02:32<09:41,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  20%|████▌                 | 91/444 [02:33<09:17,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  21%|████▌                 | 92/444 [02:37<12:15,  2.09s/it]

Trying to call OpenAI API...


Processing dataset:  21%|████▌                 | 93/444 [02:39<12:50,  2.19s/it]

Trying to call OpenAI API...


Processing dataset:  21%|████▋                 | 94/444 [02:41<12:12,  2.09s/it]

Trying to call OpenAI API...


Processing dataset:  21%|████▋                 | 95/444 [02:43<11:44,  2.02s/it]

Trying to call OpenAI API...


Processing dataset:  22%|████▊                 | 96/444 [02:45<11:24,  1.97s/it]

Trying to call OpenAI API...


Processing dataset:  22%|████▊                 | 97/444 [02:46<10:06,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  22%|████▊                 | 98/444 [02:47<09:43,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:  22%|████▉                 | 99/444 [02:49<09:03,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  23%|████▋                | 100/444 [02:50<08:47,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  23%|████▊                | 101/444 [02:52<09:16,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  23%|████▊                | 102/444 [02:54<08:56,  1.57s/it]

Trying to call OpenAI API...


Processing dataset:  23%|████▊                | 103/444 [02:56<09:43,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  23%|████▉                | 104/444 [02:58<10:16,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  24%|████▉                | 105/444 [02:59<10:17,  1.82s/it]

Trying to call OpenAI API...


Processing dataset:  24%|█████                | 106/444 [03:01<09:15,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  24%|█████                | 107/444 [03:02<08:52,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  24%|█████                | 108/444 [03:03<08:15,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  25%|█████▏               | 109/444 [03:05<07:49,  1.40s/it]

Trying to call OpenAI API...


Processing dataset:  25%|█████▏               | 110/444 [03:06<08:11,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  25%|█████▎               | 111/444 [03:08<08:47,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  25%|█████▎               | 112/444 [03:10<08:51,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  25%|█████▎               | 113/444 [03:11<08:53,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  26%|█████▍               | 114/444 [03:13<08:22,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  26%|█████▍               | 115/444 [03:15<09:04,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  26%|█████▍               | 116/444 [03:16<09:01,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  26%|█████▌               | 117/444 [03:18<08:26,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  27%|█████▌               | 118/444 [03:19<08:25,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  27%|█████▋               | 119/444 [03:22<10:32,  1.95s/it]

Trying to call OpenAI API...


Processing dataset:  27%|█████▋               | 120/444 [03:23<09:20,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  27%|█████▋               | 121/444 [03:24<08:30,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  27%|█████▊               | 122/444 [03:25<07:34,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  28%|█████▊               | 123/444 [03:27<07:15,  1.36s/it]

Trying to call OpenAI API...


Processing dataset:  28%|█████▊               | 124/444 [03:29<09:00,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:  28%|█████▉               | 125/444 [03:30<07:51,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  28%|█████▉               | 126/444 [03:31<07:04,  1.34s/it]

Trying to call OpenAI API...


Processing dataset:  29%|██████               | 127/444 [03:33<08:35,  1.63s/it]

Trying to call OpenAI API...


Processing dataset:  29%|██████               | 128/444 [03:35<08:55,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:  29%|██████               | 129/444 [03:37<08:47,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  29%|██████▏              | 130/444 [03:39<09:01,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  30%|██████▏              | 131/444 [03:40<07:54,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  30%|██████▏              | 132/444 [03:43<09:53,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:  30%|██████▎              | 133/444 [03:44<08:54,  1.72s/it]

Trying to call OpenAI API...


Processing dataset:  30%|██████▎              | 134/444 [03:45<08:33,  1.66s/it]

Trying to call OpenAI API...


Processing dataset:  30%|██████▍              | 135/444 [03:47<08:13,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  31%|██████▍              | 136/444 [03:48<07:47,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  31%|██████▍              | 137/444 [03:51<09:12,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  31%|██████▌              | 138/444 [03:52<08:41,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  31%|██████▌              | 139/444 [03:54<08:10,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  32%|██████▌              | 140/444 [03:56<09:26,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  32%|██████▋              | 141/444 [03:58<09:59,  1.98s/it]

Trying to call OpenAI API...


Processing dataset:  32%|██████▋              | 142/444 [04:00<09:27,  1.88s/it]

Trying to call OpenAI API...


Processing dataset:  32%|██████▊              | 143/444 [04:01<08:15,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  32%|██████▊              | 144/444 [04:03<08:05,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  33%|██████▊              | 145/444 [04:04<08:05,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  33%|██████▉              | 146/444 [04:06<07:56,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  33%|██████▉              | 147/444 [04:07<07:32,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  33%|███████              | 148/444 [04:08<07:03,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  34%|███████              | 149/444 [04:09<06:43,  1.37s/it]

Trying to call OpenAI API...


Processing dataset:  34%|███████              | 150/444 [04:11<07:06,  1.45s/it]

Trying to call OpenAI API...


Processing dataset:  34%|███████▏             | 151/444 [04:12<06:45,  1.38s/it]

Trying to call OpenAI API...


Processing dataset:  34%|███████▏             | 152/444 [04:14<06:30,  1.34s/it]

Trying to call OpenAI API...


Processing dataset:  34%|███████▏             | 153/444 [04:15<06:56,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  35%|███████▎             | 154/444 [04:17<07:11,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:  35%|███████▎             | 155/444 [04:19<08:16,  1.72s/it]

Trying to call OpenAI API...


Processing dataset:  35%|███████▍             | 156/444 [04:21<08:25,  1.76s/it]

Trying to call OpenAI API...


Processing dataset:  35%|███████▍             | 157/444 [04:22<07:56,  1.66s/it]

Trying to call OpenAI API...


Processing dataset:  36%|███████▍             | 158/444 [04:24<07:34,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  36%|███████▌             | 159/444 [04:26<08:03,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  36%|███████▌             | 160/444 [04:28<08:16,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  36%|███████▌             | 161/444 [04:29<07:20,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  36%|███████▋             | 162/444 [04:30<07:25,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  37%|███████▋             | 163/444 [04:33<08:20,  1.78s/it]

Trying to call OpenAI API...


Processing dataset:  37%|███████▊             | 164/444 [04:34<08:23,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  37%|███████▊             | 165/444 [04:36<07:19,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  37%|███████▊             | 166/444 [04:37<07:20,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  38%|███████▉             | 167/444 [04:39<07:57,  1.72s/it]

Trying to call OpenAI API...


Processing dataset:  38%|███████▉             | 168/444 [04:41<07:48,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  38%|███████▉             | 169/444 [04:42<07:25,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  38%|████████             | 170/444 [04:44<07:58,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  39%|████████             | 171/444 [04:46<08:07,  1.79s/it]

Trying to call OpenAI API...


Processing dataset:  39%|████████▏            | 172/444 [04:48<08:15,  1.82s/it]

Trying to call OpenAI API...


Processing dataset:  39%|████████▏            | 173/444 [04:49<07:34,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  39%|████████▏            | 174/444 [04:51<07:29,  1.66s/it]

Trying to call OpenAI API...


Processing dataset:  39%|████████▎            | 175/444 [04:53<07:09,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  40%|████████▎            | 176/444 [04:54<07:11,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  40%|████████▎            | 177/444 [04:56<07:12,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  40%|████████▍            | 178/444 [04:57<06:43,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  40%|████████▍            | 179/444 [04:59<06:47,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  41%|████████▌            | 180/444 [05:00<06:21,  1.44s/it]

Trying to call OpenAI API...


Processing dataset:  41%|████████▌            | 181/444 [05:01<06:03,  1.38s/it]

Trying to call OpenAI API...


Processing dataset:  41%|████████▌            | 182/444 [05:03<06:22,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  41%|████████▋            | 183/444 [05:04<06:02,  1.39s/it]

Trying to call OpenAI API...


Processing dataset:  41%|████████▋            | 184/444 [05:06<07:08,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  42%|████████▊            | 185/444 [05:09<08:09,  1.89s/it]

Trying to call OpenAI API...


Processing dataset:  42%|████████▊            | 186/444 [05:10<07:48,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  42%|████████▊            | 187/444 [05:13<08:31,  1.99s/it]

Trying to call OpenAI API...


Processing dataset:  42%|████████▉            | 188/444 [05:14<07:35,  1.78s/it]

Trying to call OpenAI API...


Processing dataset:  43%|████████▉            | 189/444 [05:16<07:45,  1.83s/it]

Trying to call OpenAI API...


Processing dataset:  43%|████████▉            | 190/444 [05:18<07:38,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  43%|█████████            | 191/444 [05:20<07:54,  1.88s/it]

Trying to call OpenAI API...


Processing dataset:  43%|█████████            | 192/444 [05:21<07:04,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  43%|█████████▏           | 193/444 [05:22<06:45,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  44%|█████████▏           | 194/444 [05:25<08:15,  1.98s/it]

Trying to call OpenAI API...


Processing dataset:  44%|█████████▏           | 195/444 [05:28<08:33,  2.06s/it]

Trying to call OpenAI API...


Processing dataset:  44%|█████████▎           | 196/444 [05:29<08:00,  1.94s/it]

Trying to call OpenAI API...


Processing dataset:  44%|█████████▎           | 197/444 [05:32<08:52,  2.16s/it]

Trying to call OpenAI API...


Processing dataset:  45%|█████████▎           | 198/444 [05:34<08:26,  2.06s/it]

Trying to call OpenAI API...


Processing dataset:  45%|█████████▍           | 199/444 [05:35<07:23,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  45%|█████████▍           | 200/444 [05:39<10:39,  2.62s/it]

Trying to call OpenAI API...


Processing dataset:  45%|█████████▌           | 201/444 [05:41<09:24,  2.32s/it]

Trying to call OpenAI API...


Processing dataset:  45%|█████████▌           | 202/444 [05:42<08:03,  2.00s/it]

Trying to call OpenAI API...


Processing dataset:  46%|█████████▌           | 203/444 [05:45<08:19,  2.07s/it]

Trying to call OpenAI API...


Processing dataset:  46%|█████████▋           | 204/444 [05:46<07:46,  1.95s/it]

Trying to call OpenAI API...


Processing dataset:  46%|█████████▋           | 205/444 [05:47<06:53,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  46%|█████████▋           | 206/444 [05:49<07:02,  1.78s/it]

Trying to call OpenAI API...


Processing dataset:  47%|█████████▊           | 207/444 [05:51<06:26,  1.63s/it]

Trying to call OpenAI API...


Processing dataset:  47%|█████████▊           | 208/444 [05:52<06:03,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  47%|█████████▉           | 209/444 [05:54<06:52,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  47%|█████████▉           | 210/444 [05:56<07:25,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:  48%|█████████▉           | 211/444 [05:58<06:36,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  48%|██████████           | 212/444 [05:59<06:06,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  48%|██████████           | 213/444 [06:01<06:18,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  48%|██████████           | 214/444 [06:03<07:27,  1.95s/it]

Trying to call OpenAI API...


Processing dataset:  48%|██████████▏          | 215/444 [06:05<06:36,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  49%|██████████▏          | 216/444 [06:06<06:28,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  49%|██████████▎          | 217/444 [06:08<06:12,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  49%|██████████▎          | 218/444 [06:10<06:20,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  49%|██████████▎          | 219/444 [06:11<05:40,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  50%|██████████▍          | 220/444 [06:12<05:26,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  50%|██████████▍          | 221/444 [06:13<05:02,  1.36s/it]

Trying to call OpenAI API...


Processing dataset:  50%|██████████▌          | 222/444 [06:15<05:27,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  50%|██████████▌          | 223/444 [06:16<04:53,  1.33s/it]

Trying to call OpenAI API...


Processing dataset:  50%|██████████▌          | 224/444 [06:17<04:48,  1.31s/it]

Trying to call OpenAI API...


Processing dataset:  51%|██████████▋          | 225/444 [06:19<04:55,  1.35s/it]

Trying to call OpenAI API...


Processing dataset:  51%|██████████▋          | 226/444 [06:21<06:33,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  51%|██████████▋          | 227/444 [06:23<05:47,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  51%|██████████▊          | 228/444 [06:24<05:28,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  52%|██████████▊          | 229/444 [06:25<05:15,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  52%|██████████▉          | 230/444 [06:27<05:04,  1.42s/it]

Trying to call OpenAI API...


Processing dataset:  52%|██████████▉          | 231/444 [06:28<05:01,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  52%|██████████▉          | 232/444 [06:29<04:54,  1.39s/it]

Trying to call OpenAI API...


Processing dataset:  52%|███████████          | 233/444 [06:32<06:10,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  53%|███████████          | 234/444 [06:33<05:54,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:  53%|███████████          | 235/444 [06:35<05:46,  1.66s/it]

Trying to call OpenAI API...


Processing dataset:  53%|███████████▏         | 236/444 [06:36<05:27,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  53%|███████████▏         | 237/444 [06:38<05:42,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  54%|███████████▎         | 238/444 [06:39<05:16,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  54%|███████████▎         | 239/444 [06:41<05:19,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  54%|███████████▎         | 240/444 [06:43<05:22,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  54%|███████████▍         | 241/444 [06:47<08:06,  2.40s/it]

Trying to call OpenAI API...


Processing dataset:  55%|███████████▍         | 242/444 [06:48<06:41,  1.99s/it]

Trying to call OpenAI API...


Processing dataset:  55%|███████████▍         | 243/444 [06:49<05:41,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  55%|███████████▌         | 244/444 [06:50<05:23,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  55%|███████████▌         | 245/444 [06:52<04:59,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  55%|███████████▋         | 246/444 [06:53<04:51,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  56%|███████████▋         | 247/444 [06:56<06:13,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:  56%|███████████▋         | 248/444 [06:58<06:32,  2.00s/it]

Trying to call OpenAI API...


Processing dataset:  56%|███████████▊         | 249/444 [07:00<06:09,  1.89s/it]

Trying to call OpenAI API...


Processing dataset:  56%|███████████▊         | 250/444 [07:02<06:00,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  57%|███████████▊         | 251/444 [07:03<05:49,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  57%|███████████▉         | 252/444 [07:05<05:26,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  57%|███████████▉         | 253/444 [07:07<05:57,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:  57%|████████████         | 254/444 [07:08<05:06,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  57%|████████████         | 255/444 [07:12<07:14,  2.30s/it]

Trying to call OpenAI API...


Processing dataset:  58%|████████████         | 256/444 [07:13<06:05,  1.94s/it]

Trying to call OpenAI API...


Processing dataset:  58%|████████████▏        | 257/444 [07:15<05:41,  1.83s/it]

Trying to call OpenAI API...


Processing dataset:  58%|████████████▏        | 258/444 [07:16<05:01,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  58%|████████████▎        | 259/444 [07:17<05:04,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  59%|████████████▎        | 260/444 [07:19<05:02,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  59%|████████████▎        | 261/444 [07:21<05:11,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  59%|████████████▍        | 262/444 [07:22<04:55,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  59%|████████████▍        | 263/444 [07:23<04:17,  1.42s/it]

Trying to call OpenAI API...


Processing dataset:  59%|████████████▍        | 264/444 [07:25<04:31,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  60%|████████████▌        | 265/444 [07:27<04:26,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:  60%|████████████▌        | 266/444 [07:28<04:10,  1.41s/it]

Trying to call OpenAI API...


Processing dataset:  60%|████████████▋        | 267/444 [07:29<04:21,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  60%|████████████▋        | 268/444 [07:31<04:32,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  61%|████████████▋        | 269/444 [07:34<05:36,  1.92s/it]

Trying to call OpenAI API...


Processing dataset:  61%|████████████▊        | 270/444 [07:35<04:55,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  61%|████████████▊        | 271/444 [07:37<04:53,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  61%|████████████▊        | 272/444 [07:38<04:46,  1.67s/it]

Trying to call OpenAI API...


Processing dataset:  61%|████████████▉        | 273/444 [07:40<04:35,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  62%|████████████▉        | 274/444 [07:41<04:24,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  62%|█████████████        | 275/444 [07:43<04:16,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  62%|█████████████        | 276/444 [07:44<04:21,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  62%|█████████████        | 277/444 [07:46<04:23,  1.58s/it]

Trying to call OpenAI API...


Processing dataset:  63%|█████████████▏       | 278/444 [07:48<04:51,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  63%|█████████████▏       | 279/444 [07:50<04:58,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  63%|█████████████▏       | 280/444 [07:52<04:45,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  63%|█████████████▎       | 281/444 [07:53<04:31,  1.67s/it]

Trying to call OpenAI API...


Processing dataset:  64%|█████████████▎       | 282/444 [07:56<05:27,  2.02s/it]

Trying to call OpenAI API...


Processing dataset:  64%|█████████████▍       | 283/444 [07:58<05:37,  2.09s/it]

Trying to call OpenAI API...


Processing dataset:  64%|█████████████▍       | 284/444 [08:00<05:13,  1.96s/it]

Trying to call OpenAI API...


Processing dataset:  64%|█████████████▍       | 285/444 [08:01<04:46,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  64%|█████████████▌       | 286/444 [08:03<04:32,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  65%|█████████████▌       | 287/444 [08:04<04:11,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  65%|█████████████▌       | 288/444 [08:06<04:11,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  65%|█████████████▋       | 289/444 [08:07<04:11,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  65%|█████████████▋       | 290/444 [08:10<04:48,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:  66%|█████████████▊       | 291/444 [08:12<04:35,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  66%|█████████████▊       | 292/444 [08:14<05:03,  2.00s/it]

Trying to call OpenAI API...


Processing dataset:  66%|█████████████▊       | 293/444 [08:16<04:40,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  66%|█████████████▉       | 294/444 [08:17<04:15,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  66%|█████████████▉       | 295/444 [08:18<03:57,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 296/444 [08:20<03:52,  1.57s/it]

Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 297/444 [08:21<03:54,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 298/444 [08:24<04:30,  1.85s/it]

Trying to call OpenAI API...


Processing dataset:  67%|██████████████▏      | 299/444 [08:25<04:10,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  68%|██████████████▏      | 300/444 [08:27<03:55,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  68%|██████████████▏      | 301/444 [08:29<04:03,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  68%|██████████████▎      | 302/444 [08:30<04:07,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  68%|██████████████▎      | 303/444 [08:32<04:01,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  68%|██████████████▍      | 304/444 [08:34<04:06,  1.76s/it]

Trying to call OpenAI API...


Processing dataset:  69%|██████████████▍      | 305/444 [08:36<04:06,  1.78s/it]

Trying to call OpenAI API...


Processing dataset:  69%|██████████████▍      | 306/444 [08:37<04:01,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  69%|██████████████▌      | 307/444 [08:39<04:01,  1.76s/it]

Trying to call OpenAI API...


Processing dataset:  69%|██████████████▌      | 308/444 [08:41<04:03,  1.79s/it]

Trying to call OpenAI API...


Processing dataset:  70%|██████████████▌      | 309/444 [08:42<03:46,  1.68s/it]

Trying to call OpenAI API...


Processing dataset:  70%|██████████████▋      | 310/444 [08:45<04:00,  1.79s/it]

Trying to call OpenAI API...


Processing dataset:  70%|██████████████▋      | 311/444 [08:46<03:52,  1.75s/it]

Trying to call OpenAI API...


Processing dataset:  70%|██████████████▊      | 312/444 [08:48<04:06,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:  70%|██████████████▊      | 313/444 [08:50<03:43,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  71%|██████████████▊      | 314/444 [08:51<03:39,  1.69s/it]

Trying to call OpenAI API...


Processing dataset:  71%|██████████████▉      | 315/444 [08:53<03:35,  1.67s/it]

Trying to call OpenAI API...


Processing dataset:  71%|██████████████▉      | 316/444 [08:55<03:56,  1.85s/it]

Trying to call OpenAI API...


Processing dataset:  71%|██████████████▉      | 317/444 [08:58<04:21,  2.06s/it]

Trying to call OpenAI API...


Processing dataset:  72%|███████████████      | 318/444 [09:00<04:15,  2.03s/it]

Trying to call OpenAI API...


Processing dataset:  72%|███████████████      | 319/444 [09:02<04:28,  2.15s/it]

Trying to call OpenAI API...


Processing dataset:  72%|███████████████▏     | 320/444 [09:04<04:15,  2.06s/it]

Trying to call OpenAI API...


Processing dataset:  72%|███████████████▏     | 321/444 [09:06<04:01,  1.96s/it]

Trying to call OpenAI API...


Processing dataset:  73%|███████████████▏     | 322/444 [09:08<03:53,  1.91s/it]

Trying to call OpenAI API...


Processing dataset:  73%|███████████████▎     | 323/444 [09:10<04:28,  2.22s/it]

Trying to call OpenAI API...


Processing dataset:  73%|███████████████▎     | 324/444 [09:12<03:56,  1.97s/it]

Trying to call OpenAI API...


Processing dataset:  73%|███████████████▎     | 325/444 [09:14<03:52,  1.95s/it]

Trying to call OpenAI API...


Processing dataset:  73%|███████████████▍     | 326/444 [09:16<04:12,  2.14s/it]

Trying to call OpenAI API...


Processing dataset:  74%|███████████████▍     | 327/444 [09:18<04:04,  2.09s/it]

Trying to call OpenAI API...


Processing dataset:  74%|███████████████▌     | 328/444 [09:20<03:37,  1.88s/it]

Trying to call OpenAI API...


Processing dataset:  74%|███████████████▌     | 329/444 [09:21<03:20,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  74%|███████████████▌     | 330/444 [09:23<03:08,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  75%|███████████████▋     | 331/444 [09:24<03:01,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  75%|███████████████▋     | 332/444 [09:26<03:03,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  75%|███████████████▊     | 333/444 [09:27<02:52,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  75%|███████████████▊     | 334/444 [09:29<02:57,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  75%|███████████████▊     | 335/444 [09:30<02:51,  1.57s/it]

Trying to call OpenAI API...


Processing dataset:  76%|███████████████▉     | 336/444 [09:32<02:55,  1.63s/it]

Trying to call OpenAI API...


Processing dataset:  76%|███████████████▉     | 337/444 [09:33<02:40,  1.50s/it]

Trying to call OpenAI API...


Processing dataset:  76%|███████████████▉     | 338/444 [09:35<02:43,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  76%|████████████████     | 339/444 [09:36<02:35,  1.48s/it]

Trying to call OpenAI API...


Processing dataset:  77%|████████████████     | 340/444 [09:38<02:38,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  77%|████████████████▏    | 341/444 [09:39<02:33,  1.49s/it]

Trying to call OpenAI API...


Processing dataset:  77%|████████████████▏    | 342/444 [09:40<02:21,  1.39s/it]

Trying to call OpenAI API...


Processing dataset:  77%|████████████████▏    | 343/444 [09:41<02:09,  1.28s/it]

Trying to call OpenAI API...


Processing dataset:  77%|████████████████▎    | 344/444 [09:43<02:18,  1.38s/it]

Trying to call OpenAI API...


Processing dataset:  78%|████████████████▎    | 345/444 [09:44<02:13,  1.35s/it]

Trying to call OpenAI API...


Processing dataset:  78%|████████████████▎    | 346/444 [09:45<02:03,  1.26s/it]

Trying to call OpenAI API...


Processing dataset:  78%|████████████████▍    | 347/444 [09:47<02:01,  1.25s/it]

Trying to call OpenAI API...


Processing dataset:  78%|████████████████▍    | 348/444 [09:48<02:15,  1.42s/it]

Trying to call OpenAI API...


Processing dataset:  79%|████████████████▌    | 349/444 [09:50<02:04,  1.31s/it]

Trying to call OpenAI API...


Processing dataset:  79%|████████████████▌    | 350/444 [09:51<02:02,  1.30s/it]

Trying to call OpenAI API...


Processing dataset:  79%|████████████████▌    | 351/444 [09:52<02:04,  1.33s/it]

Trying to call OpenAI API...


Processing dataset:  79%|████████████████▋    | 352/444 [09:54<02:01,  1.32s/it]

Trying to call OpenAI API...


Processing dataset:  80%|████████████████▋    | 353/444 [09:55<01:59,  1.31s/it]

Trying to call OpenAI API...


Processing dataset:  80%|████████████████▋    | 354/444 [09:56<01:57,  1.30s/it]

Trying to call OpenAI API...


Processing dataset:  80%|████████████████▊    | 355/444 [09:58<02:16,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  80%|████████████████▊    | 356/444 [10:00<02:13,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  80%|████████████████▉    | 357/444 [10:01<02:08,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  81%|████████████████▉    | 358/444 [10:02<01:58,  1.38s/it]

Trying to call OpenAI API...


Processing dataset:  81%|████████████████▉    | 359/444 [10:04<02:19,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  81%|█████████████████    | 360/444 [10:06<02:10,  1.55s/it]

Trying to call OpenAI API...


Processing dataset:  81%|█████████████████    | 361/444 [10:09<02:55,  2.12s/it]

Trying to call OpenAI API...


Processing dataset:  82%|█████████████████    | 362/444 [10:11<02:49,  2.06s/it]

Trying to call OpenAI API...


Processing dataset:  82%|█████████████████▏   | 363/444 [10:12<02:26,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  82%|█████████████████▏   | 364/444 [10:14<02:23,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  82%|█████████████████▎   | 365/444 [10:16<02:26,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  82%|█████████████████▎   | 366/444 [10:17<02:05,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  83%|█████████████████▎   | 367/444 [10:18<01:51,  1.44s/it]

Trying to call OpenAI API...


Processing dataset:  83%|█████████████████▍   | 368/444 [10:20<01:48,  1.43s/it]

Trying to call OpenAI API...


Processing dataset:  83%|█████████████████▍   | 369/444 [10:21<01:48,  1.44s/it]

Trying to call OpenAI API...


Processing dataset:  83%|█████████████████▌   | 370/444 [10:24<02:28,  2.00s/it]

Trying to call OpenAI API...


Processing dataset:  84%|█████████████████▌   | 371/444 [10:26<02:17,  1.88s/it]

Trying to call OpenAI API...


Processing dataset:  84%|█████████████████▌   | 372/444 [10:27<02:00,  1.67s/it]

Trying to call OpenAI API...


Processing dataset:  84%|█████████████████▋   | 373/444 [10:29<01:52,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  84%|█████████████████▋   | 374/444 [10:30<01:45,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  84%|█████████████████▋   | 375/444 [10:31<01:46,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  85%|█████████████████▊   | 376/444 [10:33<01:50,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  85%|█████████████████▊   | 377/444 [10:35<01:44,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  85%|█████████████████▉   | 378/444 [10:37<01:49,  1.65s/it]

Trying to call OpenAI API...


Processing dataset:  85%|█████████████████▉   | 379/444 [10:39<01:53,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  86%|█████████████████▉   | 380/444 [10:40<01:53,  1.77s/it]

Trying to call OpenAI API...


Processing dataset:  86%|██████████████████   | 381/444 [10:42<01:57,  1.87s/it]

Trying to call OpenAI API...


Processing dataset:  86%|██████████████████   | 382/444 [10:45<02:07,  2.06s/it]

Trying to call OpenAI API...


Processing dataset:  86%|██████████████████   | 383/444 [10:47<01:59,  1.96s/it]

Trying to call OpenAI API...


Processing dataset:  86%|██████████████████▏  | 384/444 [10:48<01:46,  1.77s/it]

Trying to call OpenAI API...


Processing dataset:  87%|██████████████████▏  | 385/444 [10:49<01:36,  1.64s/it]

Trying to call OpenAI API...


Processing dataset:  87%|██████████████████▎  | 386/444 [10:51<01:28,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  87%|██████████████████▎  | 387/444 [10:52<01:16,  1.34s/it]

Trying to call OpenAI API...


Processing dataset:  87%|██████████████████▎  | 388/444 [10:53<01:21,  1.45s/it]

Trying to call OpenAI API...


Processing dataset:  88%|██████████████████▍  | 389/444 [10:56<01:41,  1.85s/it]

Trying to call OpenAI API...


Processing dataset:  88%|██████████████████▍  | 390/444 [10:57<01:32,  1.72s/it]

Trying to call OpenAI API...


Processing dataset:  88%|██████████████████▍  | 391/444 [10:59<01:25,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  88%|██████████████████▌  | 392/444 [11:00<01:18,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  89%|██████████████████▌  | 393/444 [11:02<01:16,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  89%|██████████████████▋  | 394/444 [11:03<01:16,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  89%|██████████████████▋  | 395/444 [11:05<01:14,  1.53s/it]

Trying to call OpenAI API...


Processing dataset:  89%|██████████████████▋  | 396/444 [11:06<01:16,  1.60s/it]

Trying to call OpenAI API...


Processing dataset:  89%|██████████████████▊  | 397/444 [11:09<01:21,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  90%|██████████████████▊  | 398/444 [11:12<01:50,  2.39s/it]

Trying to call OpenAI API...


Processing dataset:  90%|██████████████████▊  | 399/444 [11:14<01:37,  2.16s/it]

Trying to call OpenAI API...


Processing dataset:  90%|██████████████████▉  | 400/444 [11:16<01:28,  2.01s/it]

Trying to call OpenAI API...


Processing dataset:  90%|██████████████████▉  | 401/444 [11:17<01:18,  1.83s/it]

Trying to call OpenAI API...


Processing dataset:  91%|███████████████████  | 402/444 [11:19<01:12,  1.73s/it]

Trying to call OpenAI API...


Processing dataset:  91%|███████████████████  | 403/444 [11:20<01:06,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  91%|███████████████████  | 404/444 [11:21<01:00,  1.51s/it]

Trying to call OpenAI API...


Processing dataset:  91%|███████████████████▏ | 405/444 [11:23<00:56,  1.45s/it]

Trying to call OpenAI API...


Processing dataset:  91%|███████████████████▏ | 406/444 [11:24<00:59,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  92%|███████████████████▎ | 407/444 [11:26<00:54,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  92%|███████████████████▎ | 408/444 [11:28<00:58,  1.63s/it]

Trying to call OpenAI API...


Processing dataset:  92%|███████████████████▎ | 409/444 [11:29<00:57,  1.63s/it]

Trying to call OpenAI API...


Processing dataset:  92%|███████████████████▍ | 410/444 [11:31<00:54,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  93%|███████████████████▍ | 411/444 [11:32<00:52,  1.59s/it]

Trying to call OpenAI API...


Processing dataset:  93%|███████████████████▍ | 412/444 [11:34<00:49,  1.56s/it]

Trying to call OpenAI API...


Processing dataset:  93%|███████████████████▌ | 413/444 [11:35<00:47,  1.52s/it]

Trying to call OpenAI API...


Processing dataset:  93%|███████████████████▌ | 414/444 [11:37<00:43,  1.46s/it]

Trying to call OpenAI API...


Processing dataset:  93%|███████████████████▋ | 415/444 [11:38<00:42,  1.47s/it]

Trying to call OpenAI API...


Processing dataset:  94%|███████████████████▋ | 416/444 [11:42<00:59,  2.12s/it]

Trying to call OpenAI API...


Processing dataset:  94%|███████████████████▋ | 417/444 [11:43<00:51,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:  94%|███████████████████▊ | 418/444 [11:45<00:47,  1.84s/it]

Trying to call OpenAI API...


Processing dataset:  94%|███████████████████▊ | 419/444 [11:47<00:50,  2.04s/it]

Trying to call OpenAI API...


Processing dataset:  95%|███████████████████▊ | 420/444 [11:49<00:43,  1.82s/it]

Trying to call OpenAI API...


Processing dataset:  95%|███████████████████▉ | 421/444 [11:50<00:40,  1.74s/it]

Trying to call OpenAI API...


Processing dataset:  95%|███████████████████▉ | 422/444 [11:52<00:39,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  95%|████████████████████ | 423/444 [11:54<00:35,  1.70s/it]

Trying to call OpenAI API...


Processing dataset:  95%|████████████████████ | 424/444 [11:56<00:35,  1.77s/it]

Trying to call OpenAI API...


Processing dataset:  96%|████████████████████ | 425/444 [11:57<00:31,  1.67s/it]

Trying to call OpenAI API...


Processing dataset:  96%|████████████████████▏| 426/444 [11:58<00:27,  1.54s/it]

Trying to call OpenAI API...


Processing dataset:  96%|████████████████████▏| 427/444 [12:00<00:27,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  96%|████████████████████▏| 428/444 [12:02<00:27,  1.71s/it]

Trying to call OpenAI API...


Processing dataset:  97%|████████████████████▎| 429/444 [12:03<00:24,  1.61s/it]

Trying to call OpenAI API...


Processing dataset:  97%|████████████████████▎| 430/444 [12:05<00:22,  1.62s/it]

Trying to call OpenAI API...


Processing dataset:  97%|████████████████████▍| 431/444 [12:09<00:29,  2.30s/it]

Trying to call OpenAI API...


Processing dataset:  97%|████████████████████▍| 432/444 [12:11<00:25,  2.17s/it]

Trying to call OpenAI API...


Processing dataset:  98%|████████████████████▍| 433/444 [12:12<00:20,  1.88s/it]

Trying to call OpenAI API...


Processing dataset:  98%|████████████████████▌| 434/444 [12:14<00:17,  1.80s/it]

Trying to call OpenAI API...


Processing dataset:  98%|████████████████████▌| 435/444 [12:15<00:16,  1.82s/it]

Trying to call OpenAI API...


Processing dataset:  98%|████████████████████▌| 436/444 [12:17<00:14,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  98%|████████████████████▋| 437/444 [12:19<00:13,  1.90s/it]

Trying to call OpenAI API...


Processing dataset:  99%|████████████████████▋| 438/444 [12:21<00:10,  1.81s/it]

Trying to call OpenAI API...


Processing dataset:  99%|████████████████████▊| 439/444 [12:23<00:09,  1.82s/it]

Trying to call OpenAI API...


Processing dataset:  99%|████████████████████▊| 440/444 [12:25<00:07,  1.86s/it]

Trying to call OpenAI API...


Processing dataset:  99%|████████████████████▊| 441/444 [12:27<00:05,  1.95s/it]

Trying to call OpenAI API...


Processing dataset: 100%|████████████████████▉| 442/444 [12:29<00:03,  1.91s/it]

Trying to call OpenAI API...


Processing dataset: 100%|████████████████████▉| 443/444 [12:30<00:01,  1.77s/it]

Trying to call OpenAI API...


Processing dataset: 100%|█████████████████████| 444/444 [12:31<00:00,  1.69s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_other[f'gpt_predictions_{prompt_id}_raw'] = apply_gpt_with_progress(df_other['input_journal_title_abstract'], prompt_text, model)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_other[f'gpt_predictions_{prompt_id}'] = df_other[f'gpt_predictions_{prompt_id}_raw'].apply(


In [24]:
df_animal.head()

Unnamed: 0,pmid,input_journal_title_abstract,accepted_label,multi_label,binary_label,gpt_predictions_P3_binary,gpt_predictions_P2_HIERARCHY_raw,gpt_predictions_P2_HIERARCHY
21,32147509,<journal>Neuroscience</journal><title>Neurobio...,Non-systematic-review,1,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other
33,16312938,<journal>Zhongguo zhen jiu = Chinese acupunctu...,Non-systematic-review,1,0,ANIMAL,"{\n ""gpt_label"": ""Animal-non-drug-interventio...",Animal-non-drug-intervention
71,23811310,<journal>Biochemical pharmacology</journal><ti...,Non-systematic-review,1,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other
157,33846423,<journal>Scientific reports</journal><title>ST...,Remaining,0,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other
264,11909745,<journal>Microbes and infection</journal><titl...,Remaining,0,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other


In [25]:
df_other.head()

Unnamed: 0,pmid,input_journal_title_abstract,accepted_label,multi_label,binary_label,gpt_predictions_P3_binary,gpt_predictions_P2_HIERARCHY_raw,gpt_predictions_P2_HIERARCHY
0,12047012,<journal>Schizophrenia bulletin</journal><titl...,Non-systematic-review,1,0,OTHER,"{""gpt_label"": ""Non-systematic-review""}",Non-systematic-review
1,28832188,<journal>Future medicinal chemistry</journal><...,Non-systematic-review,1,0,OTHER,"{\n ""gpt_label"": ""Non-systematic-review""\n}",Non-systematic-review
2,17678496,<journal>Expert review of neurotherapeutics</j...,Non-systematic-review,1,0,OTHER,"{\n ""gpt_label"": ""Human, Non-RCT non-drug-int...","Human, Non-RCT non-drug-intervention"
3,25649308,<journal>Annals of the New York Academy of Sci...,Non-systematic-review,1,0,OTHER,"{\n ""gpt_label"": ""Non-systematic-review""\n}",Non-systematic-review
4,6312596,<journal>La semaine des hopitaux : organe fond...,Non-systematic-review,1,0,OTHER,"{\n ""gpt_label"": ""Non-systematic-review""\n}",Non-systematic-review


In [26]:
appended_df = pd.concat([df_animal, df_other], ignore_index=True)
appended_df.shape

(534, 8)

In [27]:
appended_df.head()

Unnamed: 0,pmid,input_journal_title_abstract,accepted_label,multi_label,binary_label,gpt_predictions_P3_binary,gpt_predictions_P2_HIERARCHY_raw,gpt_predictions_P2_HIERARCHY
0,32147509,<journal>Neuroscience</journal><title>Neurobio...,Non-systematic-review,1,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other
1,16312938,<journal>Zhongguo zhen jiu = Chinese acupunctu...,Non-systematic-review,1,0,ANIMAL,"{\n ""gpt_label"": ""Animal-non-drug-interventio...",Animal-non-drug-intervention
2,23811310,<journal>Biochemical pharmacology</journal><ti...,Non-systematic-review,1,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other
3,33846423,<journal>Scientific reports</journal><title>ST...,Remaining,0,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other
4,11909745,<journal>Microbes and infection</journal><titl...,Remaining,0,0,ANIMAL,"{\n ""gpt_label"": ""Animal-other""\n}",Animal-other


In [28]:
appended_df.to_csv(f"predictions/{model}_enriched_kw_test_outputs_{prompt_id_to_use_for_binary}_{'_'.join(prompt_ids_to_test)}_{file_to_save_suffix}.csv") # saving after each prompt strategy has been run, in order not to loose information in case a later strategy fails