In [1]:
import torch
import numpy as np
from pathlib import Path

from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics import pairwise
import pandas as pd
from openai import OpenAI
import openai

from sklearn.metrics import accuracy_score, classification_report, balanced_accuracy_score
from sklearn.metrics import balanced_accuracy_score

import random

In [2]:
embeddings_data_path = Path("../data/embeddings/")
input_data_path = Path("../data/data_splits_stratified/6-2-2_all_classes_enriched_with_kw")

## Load Embeddings and Calculate Similarities

### Load 
-> each row represents the text from one sample embedded into a 768-demnsional vector

In [3]:
embeddings_train = np.load(
    embeddings_data_path / "embeddings_microsoft_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_train_ds.npy"
)
embeddings_test = np.load(
    embeddings_data_path / "embeddings_microsoft_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_test_ds.npy"
)

In [4]:
embeddings_train.shape

(1191, 768)

In [5]:
embeddings_test.shape

(404, 768)

### Compute similarity between test dataset elements to all train elements


In [6]:
dist_matrix = pairwise_distances(embeddings_test, embeddings_train, metric='sqeuclidean')
dist_matrix.shape

(404, 1191)

In [7]:
dist_matrix_cosine = pairwise.cosine_similarity(X=embeddings_test, Y=embeddings_train)
dist_matrix.shape

(404, 1191)

The matrix has the dimensions 404 (test data elements) x 1191 (train data elements). Each of the 1191 values per test row is the similarity score between the test element to the train elements.

In [8]:
dist_matrix_cosine

array([[0.98506499, 0.98830865, 0.98870777, ..., 0.98741564, 0.98696952,
        0.98295794],
       [0.99007619, 0.99129481, 0.9916276 , ..., 0.99085124, 0.99138424,
        0.98798618],
       [0.98900018, 0.99066293, 0.9915621 , ..., 0.98766824, 0.99073964,
        0.98834762],
       ...,
       [0.98882802, 0.9873871 , 0.98857115, ..., 0.9928928 , 0.98869445,
        0.98763254],
       [0.98709908, 0.987791  , 0.9864768 , ..., 0.9930578 , 0.98749872,
        0.98896695],
       [0.98904398, 0.98862152, 0.98700841, ..., 0.9863798 , 0.99105832,
        0.99154065]])

### Find closest neighbours from the train dataset to each test example

In [9]:
values, indices = torch.topk(-torch.from_numpy(dist_matrix_cosine), k=3, dim=-1)

In [10]:
indices.shape

torch.Size([404, 3])

The indices have the dimension 404 (test data elements) x 3 (top k=3 closest train data elements).

In [11]:
indices

tensor([[ 128, 1032,  413],
        [ 128, 1032,  413],
        [ 128,  413, 1032],
        ...,
        [ 128,   11, 1032],
        [  11,  128,  356],
        [ 128,  356, 1032]])

In [12]:
df_train = pd.read_csv(input_data_path/ 'train.csv')
df_test = pd.read_csv(input_data_path/ 'test.csv')


In [13]:
df_test[df_test['accepted_label'] == 'In-vitro-study']

Unnamed: 0,idx,pmid,journal_name,title,abstract,keywords,accepted_label,multi_label,binary_label
265,266,24036210,Cellular signalling,Homer1 knockdown protects dopamine neurons thr...,Homer1 protein is an important scaffold protei...,Calcium channel| Endoplasmic reticulum| Homer|...,In-vitro-study,8,0
266,267,31852295,"Artificial cells, nanomedicine, and biotechnology",Fyn gene silencing reduces oligodendrocytes ap...,This study aimed to investigate the effect of ...,ERK1/2| Epilepsy| Fyn| apoptosis| gene silenci...,In-vitro-study,8,0
267,268,14655759,"Brain pathology (Zurich, Switzerland)",TRAIL triggers apoptosis in human malignant gl...,Many malignant glioma cells express death rece...,,In-vitro-study,8,0
268,269,1720637,No to shinkei = Brain and nerve,[Immunohistochemical study of glial cytoplasmi...,"Recently, glial cytoplasmic inclusion (GCI) ha...",,In-vitro-study,8,0
269,270,34477946,Journal of cancer research and clinical oncology,Selective induction of rapid cytotoxic effect ...,PURPOSE: The mechanisms underlying anticancer ...,Brain tumor| Cell culture| Noninvasive treatme...,In-vitro-study,8,0
270,271,28923314,Journal of virological methods,A simple method for isolation of cell-associat...,A common method for cell-associated virus isol...,Cell-associated virus| Cell-free extract| Infe...,In-vitro-study,8,0
271,272,27030558,"Network (Bristol, England)",Molecular interaction studies of green tea cat...,Green tea catechins have extensively been stud...,Catechins| molecular docking| pharmacokinetic ...,In-vitro-study,8,0
272,273,26568330,"Experimental biology and medicine (Maywood, N.J.)",Penetration and intracellular uptake of poly(g...,Nanoparticle (NP) drug delivery systems may po...,Bionanoscience| brain| polymer nanoparticles| ...,In-vitro-study,8,0
273,274,23926278,"Journal of neurology, neurosurgery, and psychi...",Sera from patients with multifocal motor neuro...,"In multifocal motor neuropathy (MMN), the dest...",BLOOD-BRAIN BARRIER| IMMUNOLOGY| NEUROCHEMISTR...,In-vitro-study,8,0
274,275,22975132,Brain research,Expression of CD200 in alternative activation ...,CD200 is a glycoprotein that is expressed on t...,,In-vitro-study,8,0


In [14]:
df_test.iloc[246]

idx                                                             247
pmid                                                       37905374
journal_name                                      Cognitive science
title             Repeat After Me? Both Children With and Withou...
abstract          Linguistic repetitions in children are concept...
keywords          Autism| Echolalia| Individual differences| Lan...
accepted_label                                            Remaining
multi_label                                                       0
binary_label                                                      0
Name: 246, dtype: object

In [15]:
indices[246]

tensor([ 128,  356, 1032])

In [16]:
df_train.iloc[1032]

idx                                                            1033
pmid                                                       21356158
journal_name                         International psychogeriatrics
title             Animal-assisted activity and emotional status ...
abstract          BACKGROUND: Preliminary studies suggest benefi...
keywords                                                        NaN
accepted_label                  Human-non-RCT-non-drug-intervention
multi_label                                                       2
binary_label                                                      0
Name: 1032, dtype: object

In [17]:
df_test.groupby('accepted_label').size()

accepted_label
Animal-drug-intervention                30
Animal-non-drug-intervention            16
Animal-other                            31
Clinical-study-protocol                 12
Human-RCT-drug-intervention             15
Human-RCT-non-drug-intervention         18
Human-RCT-non-intervention              11
Human-case-report                       33
Human-non-RCT-drug-intervention         36
Human-non-RCT-non-drug-intervention     42
Human-systematic-review                 22
In-vitro-study                          21
Non-systematic-review                   75
Remaining                              172
dtype: int64

## Init OpenAI API

In [18]:
def load_pass(file_path, key_to_find):
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split("=")
            if len(parts) == 2 and parts[0] == key_to_find:
                found_password = parts[1]
                break
    if found_password:
        print("Found password.")
        return found_password
    else:
        print("Password not found for key:", key_to_find)

In [19]:
openai.api_key = load_pass("./credentials.txt", "OPENAI")
client = OpenAI(api_key=openai.api_key)

Found password.


## Create Prompts

In [20]:
df_train['input_journal_title_abstract'] = '<journal>' + df_train['journal_name'] + '</journal>' + \
                                         '<title>' + df_train['title'] + '</title>' + \
                                         '<abstract>' + df_train['abstract'] + '</abstract>'
df_test['input_journal_title_abstract'] = '<journal>' + df_test['journal_name'] + '</journal>' + \
                                         '<title>' + df_test['title'] + '</title>' + \
                                         '<abstract>' + df_test['abstract'] + '</abstract>'

In [21]:
df_test

Unnamed: 0,idx,pmid,journal_name,title,abstract,keywords,accepted_label,multi_label,binary_label,input_journal_title_abstract
0,1,12047012,Schizophrenia bulletin,Can clinical practice guide a research agenda?,Articles from this issue of the Bulletin indic...,,Non-systematic-review,1,0,<journal>Schizophrenia bulletin</journal><titl...
1,2,28832188,Future medicinal chemistry,Dual/multitargeted xanthone derivatives for Al...,"To date, the current therapy for Alzheimer's d...",Alzheimer's disease| Aβ aggregation| anticholi...,Non-systematic-review,1,0,<journal>Future medicinal chemistry</journal><...
2,3,17678496,Expert review of neurotherapeutics,Benefits of occupational therapy in stroke reh...,Stroke is the largest single cause of severe p...,,Non-systematic-review,1,0,<journal>Expert review of neurotherapeutics</j...
3,4,25649308,Annals of the New York Academy of Sciences,The promise of ketamine for treatment-resistan...,Major depressive disorder (MDD) is one of the ...,antidepressant| bipolar disorder| ketamine| ma...,Non-systematic-review,1,0,<journal>Annals of the New York Academy of Sci...
4,5,6312596,La semaine des hopitaux : organe fonde par l'A...,[Clinical and pathogenic aspects of secondary ...,The secondary hyperlipoproteinemias are freque...,,Non-systematic-review,1,0,<journal>La semaine des hopitaux : organe fond...
...,...,...,...,...,...,...,...,...,...,...
529,530,24507711,Journal of clinical neuroscience : official jo...,Validation of the Chinese version of the NUCOG...,We aimed to develop and validate a Chinese ver...,Cognition| Dementia| Epilepsy| Mini-Mental Sta...,Human-RCT-non-intervention,13,0,<journal>Journal of clinical neuroscience : of...
530,531,28063795,The Lancet. Infectious diseases,Effect of new tuberculosis diagnostic technolo...,BACKGROUND: Inadequate case detection results ...,,Human-RCT-non-intervention,13,0,<journal>The Lancet. Infectious diseases</jour...
531,532,35488938,European child & adolescent psychiatry,The impact of school-based screening on servic...,Early detection and intervention can counterac...,Adolescents| Mental health problems| Risk beha...,Human-RCT-non-intervention,13,0,<journal>European child & adolescent psychiatr...
532,533,16158009,Dementia and geriatric cognitive disorders,Validating the DemTect with 18-fluoro-2-deoxy-...,OBJECTIVES: The first study to validate the di...,,Human-RCT-non-intervention,13,0,<journal>Dementia and geriatric cognitive diso...


In [22]:
def create_prompt(df_train, df_test, test_index, example_indices):
    # Start the prompt with a task description (optional)
    prompt = "Classify this text, choosing one of these labels: Clinical-study-protocol, Human-systematic-review, Non-systematic-review, Human-RCT-non-drug-intervention, Human-RCT-drug-intervention, Human-RCT-non-intervention, Human-case-report, Human-non-RCT-non-drug-intervention, Human-non-RCT-drug-intervention, Animal-systematic-review, Animal-drug-intervention, Animal-non-drug-intervention, Animal-other, In-vitro-study, Remaining. Respond in json format with the key: gpt_label.\n\n"
    
    # Add examples from df_train
    for idx in example_indices:
        example_text = df_train.loc[idx, 'input_journal_title_abstract']
        example_label = df_train.loc[idx, 'accepted_label']
        prompt += f"Text: \"{example_text}\"\nCategory: {example_label}\n\n"
    
    # Add the test text needing classification
    test_text = df_test.loc[test_index, 'input_journal_title_abstract']
    prompt += f"Text: \"{test_text}\"\nCategory: "
    
    return prompt

In [23]:
example_i = 0
example_prompt = create_prompt(df_train, df_test, example_i, indices[example_i].tolist())
#example_prompt

In [24]:
len(example_prompt.split())

599

In [25]:
import time
from tqdm.auto import tqdm

DEFAULT_TEMPERATURE = 0

def create_prompt(df_train, example_indices, input_raw_text):
    prompt = "Classify this text, choosing one of these labels: Clinical-study-protocol, Human-systematic-review, Non-systematic-review, Human-RCT-non-drug-intervention, Human-RCT-drug-intervention, Human-RCT-non-intervention, Human-case-report, Human-non-RCT-non-drug-intervention, Human-non-RCT-drug-intervention, Animal-systematic-review, Animal-drug-intervention, Animal-non-drug-intervention, Animal-other, In-vitro-study, Remaining. Respond in json format with the key: gpt_label.\n\n"
    for idx in example_indices:
        example_text = df_train.loc[idx, 'input_journal_title_abstract']
        example_label = df_train.loc[idx, 'accepted_label']
        prompt += f"Text: \"{example_text}\"\nCategory: {example_label}\n\n"
    prompt += f"Text: \"{input_raw_text}\"\nCategory: "
    return prompt

def query_gpt(df_train, input_raw_text, example_indices, gpt_model="gpt-3.5-turbo", temperature=DEFAULT_TEMPERATURE, max_retries=5, retry_delay=3):
    prompt_text = create_prompt(df_train, example_indices, input_raw_text)
    system_msg = f"You are an expert assistant specialized in text classification of PubMed abstracts."

    retries = 0
    while retries < max_retries:
        print("Trying to call OpenAI API...")
        try:
            completion = client.chat.completions.create(
                model=gpt_model,  
                response_format={"type": "json_object"},
                temperature=temperature,
                messages=[
                    {"role": "system", "content": system_msg},
                    {"role": "user", "content": prompt_text}
                ]
            )
            return completion.choices[0].message.content
        except Exception as e:
            print(f"OpenAI API returned an error: {e}")
            time.sleep(retry_delay)
            retries += 1

    raise RuntimeError("Max retries reached. Unable to complete the API call.")

def apply_gpt_with_progress(df_train, test_data_series, example_indices_tensor=None, num_samples=3, use_random=False, model="gpt-3.5-turbo"):
    results = []
    total_items = len(test_data_series)
    with tqdm(total=total_items, desc="Processing dataset") as pbar:
        for i, text in enumerate(test_data_series):
            if use_random:
                example_indices = random.sample(range(len(df_train)), num_samples)
            else:
                example_indices = example_indices_tensor[i].tolist()
            print("Retrieved in-context learning examples with idx: ", example_indices)
            result = query_gpt(df_train, text, example_indices, model)
            results.append(result)
            pbar.update(1)
    return results


  from .autonotebook import tqdm as notebook_tqdm


In [26]:
# Example usage:
df_test[f'gpt_predictions_in_context'] = apply_gpt_with_progress(df_train, df_test['input_journal_title_abstract'], indices)

Processing dataset:   0%|                               | 0/534 [00:00<?, ?it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   0%|                       | 1/534 [00:02<21:40,  2.44s/it]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   0%|                       | 2/534 [00:03<12:48,  1.44s/it]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   1%|▏                      | 3/534 [00:03<09:54,  1.12s/it]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   1%|▏                      | 4/534 [00:04<09:31,  1.08s/it]

Retrieved in-context learning examples with idx:  [413, 356, 128]
Trying to call OpenAI API...


Processing dataset:   1%|▏                      | 5/534 [00:05<08:39,  1.02it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:   1%|▎                      | 6/534 [00:06<07:45,  1.13it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:   1%|▎                      | 7/534 [00:07<07:20,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:   1%|▎                      | 8/534 [00:08<07:17,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:   2%|▍                      | 9/534 [00:08<06:56,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   2%|▍                     | 10/534 [00:09<07:31,  1.16it/s]

Retrieved in-context learning examples with idx:  [356, 128, 11]
Trying to call OpenAI API...


Processing dataset:   2%|▍                     | 11/534 [00:10<06:53,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   2%|▍                     | 12/534 [00:11<07:29,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   2%|▌                     | 13/534 [00:12<07:20,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:   3%|▌                     | 14/534 [00:13<07:21,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   3%|▌                     | 15/534 [00:13<07:09,  1.21it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   3%|▋                     | 16/534 [00:14<07:26,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:   3%|▋                     | 17/534 [00:15<07:52,  1.09it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:   3%|▋                     | 18/534 [00:16<07:26,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:   4%|▊                     | 19/534 [00:17<07:48,  1.10it/s]

Retrieved in-context learning examples with idx:  [191, 128, 1032]
Trying to call OpenAI API...


Processing dataset:   4%|▊                     | 20/534 [00:18<07:32,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:   4%|▊                     | 21/534 [00:19<07:25,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:   4%|▉                     | 22/534 [00:20<07:45,  1.10it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:   4%|▉                     | 23/534 [00:21<07:45,  1.10it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:   4%|▉                     | 24/534 [00:21<07:26,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:   5%|█                     | 25/534 [00:22<07:09,  1.18it/s]

Retrieved in-context learning examples with idx:  [413, 1032, 128]
Trying to call OpenAI API...


Processing dataset:   5%|█                     | 26/534 [00:23<06:48,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   5%|█                     | 27/534 [00:24<06:27,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:   5%|█▏                    | 28/534 [00:24<06:19,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:   5%|█▏                    | 29/534 [00:25<06:11,  1.36it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   6%|█▏                    | 30/534 [00:26<06:14,  1.35it/s]

Retrieved in-context learning examples with idx:  [191, 984, 65]
Trying to call OpenAI API...


Processing dataset:   6%|█▎                    | 31/534 [00:27<06:32,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:   6%|█▎                    | 32/534 [00:27<06:24,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   6%|█▎                    | 33/534 [00:28<06:21,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   6%|█▍                    | 34/534 [00:29<06:26,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:   7%|█▍                    | 35/534 [00:30<06:28,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   7%|█▍                    | 36/534 [00:30<06:11,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:   7%|█▌                    | 37/534 [00:31<06:20,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   7%|█▌                    | 38/534 [00:32<06:43,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:   7%|█▌                    | 39/534 [00:33<06:28,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   7%|█▋                    | 40/534 [00:33<06:06,  1.35it/s]

Retrieved in-context learning examples with idx:  [191, 697, 990]
Trying to call OpenAI API...


Processing dataset:   8%|█▋                    | 41/534 [00:34<06:31,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   8%|█▋                    | 42/534 [00:35<06:14,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:   8%|█▊                    | 43/534 [00:36<06:08,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   8%|█▊                    | 44/534 [00:37<06:28,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:   8%|█▊                    | 45/534 [00:37<06:23,  1.27it/s]

Retrieved in-context learning examples with idx:  [356, 128, 11]
Trying to call OpenAI API...


Processing dataset:   9%|█▉                    | 46/534 [00:38<06:11,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:   9%|█▉                    | 47/534 [00:39<06:26,  1.26it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:   9%|█▉                    | 48/534 [00:40<06:20,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:   9%|██                    | 49/534 [00:40<06:08,  1.32it/s]

Retrieved in-context learning examples with idx:  [413, 128, 356]
Trying to call OpenAI API...


Processing dataset:   9%|██                    | 50/534 [00:41<05:47,  1.39it/s]

Retrieved in-context learning examples with idx:  [191, 128, 11]
Trying to call OpenAI API...


Processing dataset:  10%|██                    | 51/534 [00:42<05:56,  1.36it/s]

Retrieved in-context learning examples with idx:  [413, 128, 1032]
Trying to call OpenAI API...


Processing dataset:  10%|██▏                   | 52/534 [00:43<05:58,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  10%|██▏                   | 53/534 [00:44<06:23,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  10%|██▏                   | 54/534 [00:45<07:18,  1.09it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  10%|██▎                   | 55/534 [00:46<06:55,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  10%|██▎                   | 56/534 [00:46<06:57,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  11%|██▎                   | 57/534 [00:47<07:03,  1.13it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  11%|██▍                   | 58/534 [00:48<06:36,  1.20it/s]

Retrieved in-context learning examples with idx:  [191, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  11%|██▍                   | 59/534 [00:49<06:06,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  11%|██▍                   | 60/534 [00:49<05:51,  1.35it/s]

Retrieved in-context learning examples with idx:  [356, 11, 128]
Trying to call OpenAI API...


Processing dataset:  11%|██▌                   | 61/534 [00:50<05:54,  1.33it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:  12%|██▌                   | 62/534 [00:51<05:32,  1.42it/s]

Retrieved in-context learning examples with idx:  [1032, 128, 413]
Trying to call OpenAI API...


Processing dataset:  12%|██▌                   | 63/534 [00:51<05:41,  1.38it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:  12%|██▋                   | 64/534 [00:52<05:30,  1.42it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  12%|██▋                   | 65/534 [00:53<06:17,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  12%|██▋                   | 66/534 [00:54<06:23,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  13%|██▊                   | 67/534 [00:55<06:51,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  13%|██▊                   | 68/534 [00:56<07:15,  1.07it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  13%|██▊                   | 69/534 [00:57<08:07,  1.05s/it]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  13%|██▉                   | 70/534 [00:59<09:11,  1.19s/it]

Retrieved in-context learning examples with idx:  [128, 356, 66]
Trying to call OpenAI API...


Processing dataset:  13%|██▉                   | 71/534 [01:00<08:15,  1.07s/it]

Retrieved in-context learning examples with idx:  [128, 191, 1114]
Trying to call OpenAI API...


Processing dataset:  13%|██▉                   | 72/534 [01:00<07:22,  1.04it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  14%|███                   | 73/534 [01:01<06:41,  1.15it/s]

Retrieved in-context learning examples with idx:  [356, 128, 11]
Trying to call OpenAI API...


Processing dataset:  14%|███                   | 74/534 [01:02<06:06,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  14%|███                   | 75/534 [01:03<06:14,  1.22it/s]

Retrieved in-context learning examples with idx:  [191, 697, 990]
Trying to call OpenAI API...


Processing dataset:  14%|███▏                  | 76/534 [01:04<06:35,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  14%|███▏                  | 77/534 [01:04<06:34,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  15%|███▏                  | 78/534 [01:05<07:02,  1.08it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  15%|███▎                  | 79/534 [01:07<07:15,  1.05it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  15%|███▎                  | 80/534 [01:07<07:09,  1.06it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  15%|███▎                  | 81/534 [01:08<06:51,  1.10it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:  15%|███▍                  | 82/534 [01:09<06:44,  1.12it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  16%|███▍                  | 83/534 [01:10<06:27,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  16%|███▍                  | 84/534 [01:11<06:25,  1.17it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  16%|███▌                  | 85/534 [01:12<06:15,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  16%|███▌                  | 86/534 [01:13<06:53,  1.08it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  16%|███▌                  | 87/534 [01:13<06:24,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  16%|███▋                  | 88/534 [01:14<06:04,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  17%|███▋                  | 89/534 [01:15<06:06,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  17%|███▋                  | 90/534 [01:16<06:03,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  17%|███▋                  | 91/534 [01:17<06:09,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  17%|███▊                  | 92/534 [01:17<05:44,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  17%|███▊                  | 93/534 [01:18<05:37,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  18%|███▊                  | 94/534 [01:19<05:41,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1000]
Trying to call OpenAI API...


Processing dataset:  18%|███▉                  | 95/534 [01:20<05:49,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  18%|███▉                  | 96/534 [01:20<05:38,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  18%|███▉                  | 97/534 [01:21<05:27,  1.33it/s]

Retrieved in-context learning examples with idx:  [356, 11, 128]
Trying to call OpenAI API...


Processing dataset:  18%|████                  | 98/534 [01:22<05:29,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  19%|████                  | 99/534 [01:23<05:22,  1.35it/s]

Retrieved in-context learning examples with idx:  [191, 128, 739]
Trying to call OpenAI API...


Processing dataset:  19%|███▉                 | 100/534 [01:23<05:47,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  19%|███▉                 | 101/534 [01:24<05:21,  1.35it/s]

Retrieved in-context learning examples with idx:  [191, 128, 356]
Trying to call OpenAI API...


Processing dataset:  19%|████                 | 102/534 [01:25<05:26,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 191, 1000]
Trying to call OpenAI API...


Processing dataset:  19%|████                 | 103/534 [01:26<05:41,  1.26it/s]

Retrieved in-context learning examples with idx:  [191, 65, 739]
Trying to call OpenAI API...


Processing dataset:  19%|████                 | 104/534 [01:27<05:39,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  20%|████▏                | 105/534 [01:27<05:30,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  20%|████▏                | 106/534 [01:28<05:30,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  20%|████▏                | 107/534 [01:29<05:26,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  20%|████▏                | 108/534 [01:30<05:36,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  20%|████▎                | 109/534 [01:30<05:18,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  21%|████▎                | 110/534 [01:31<05:23,  1.31it/s]

Retrieved in-context learning examples with idx:  [11, 128, 356]
Trying to call OpenAI API...


Processing dataset:  21%|████▎                | 111/534 [01:32<05:17,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  21%|████▍                | 112/534 [01:33<06:05,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  21%|████▍                | 113/534 [01:34<06:00,  1.17it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  21%|████▍                | 114/534 [01:34<05:42,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  22%|████▌                | 115/534 [01:35<05:19,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  22%|████▌                | 116/534 [01:36<05:21,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  22%|████▌                | 117/534 [01:38<07:33,  1.09s/it]

Retrieved in-context learning examples with idx:  [128, 1032, 191]
Trying to call OpenAI API...


Processing dataset:  22%|████▋                | 118/534 [01:39<06:57,  1.00s/it]

Retrieved in-context learning examples with idx:  [191, 128, 4]
Trying to call OpenAI API...


Processing dataset:  22%|████▋                | 119/534 [01:39<06:38,  1.04it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  22%|████▋                | 120/534 [01:40<06:12,  1.11it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  23%|████▊                | 121/534 [01:41<06:08,  1.12it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  23%|████▊                | 122/534 [01:42<05:58,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 11, 4]
Trying to call OpenAI API...


Processing dataset:  23%|████▊                | 123/534 [01:43<06:03,  1.13it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  23%|████▉                | 124/534 [01:43<05:41,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  23%|████▉                | 125/534 [01:44<05:20,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  24%|████▉                | 126/534 [01:45<05:34,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  24%|████▉                | 127/534 [01:46<06:06,  1.11it/s]

Retrieved in-context learning examples with idx:  [128, 191, 1032]
Trying to call OpenAI API...


Processing dataset:  24%|█████                | 128/534 [01:47<05:33,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  24%|█████                | 129/534 [01:47<05:18,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  24%|█████                | 130/534 [01:48<05:19,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 11, 1032]
Trying to call OpenAI API...


Processing dataset:  25%|█████▏               | 131/534 [01:49<05:12,  1.29it/s]

Retrieved in-context learning examples with idx:  [191, 128, 140]
Trying to call OpenAI API...


Processing dataset:  25%|█████▏               | 132/534 [01:50<05:14,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  25%|█████▏               | 133/534 [01:51<05:13,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  25%|█████▎               | 134/534 [01:51<04:54,  1.36it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  25%|█████▎               | 135/534 [01:52<05:06,  1.30it/s]

Retrieved in-context learning examples with idx:  [11, 356, 128]
Trying to call OpenAI API...


Processing dataset:  25%|█████▎               | 136/534 [01:53<05:06,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  26%|█████▍               | 137/534 [01:53<04:50,  1.37it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  26%|█████▍               | 138/534 [01:54<04:46,  1.38it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  26%|█████▍               | 139/534 [01:55<04:53,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  26%|█████▌               | 140/534 [01:56<04:55,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  26%|█████▌               | 141/534 [01:57<04:54,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 356, 191]
Trying to call OpenAI API...


Processing dataset:  27%|█████▌               | 142/534 [01:57<04:46,  1.37it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:  27%|█████▌               | 143/534 [01:58<04:46,  1.36it/s]

Retrieved in-context learning examples with idx:  [128, 356, 191]
Trying to call OpenAI API...


Processing dataset:  27%|█████▋               | 144/534 [01:59<04:51,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  27%|█████▋               | 145/534 [01:59<04:49,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  27%|█████▋               | 146/534 [02:00<04:58,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  28%|█████▊               | 147/534 [02:01<05:03,  1.28it/s]

Retrieved in-context learning examples with idx:  [413, 356, 128]
Trying to call OpenAI API...


Processing dataset:  28%|█████▊               | 148/534 [02:02<04:56,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 356, 841]
Trying to call OpenAI API...


Processing dataset:  28%|█████▊               | 149/534 [02:03<04:48,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  28%|█████▉               | 150/534 [02:03<04:33,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  28%|█████▉               | 151/534 [02:04<04:31,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  28%|█████▉               | 152/534 [02:05<04:31,  1.41it/s]

Retrieved in-context learning examples with idx:  [191, 11, 356]
Trying to call OpenAI API...


Processing dataset:  29%|██████               | 153/534 [02:05<04:31,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 356, 191]
Trying to call OpenAI API...


Processing dataset:  29%|██████               | 154/534 [02:06<05:20,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  29%|██████               | 155/534 [02:07<05:03,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 11, 1032]
Trying to call OpenAI API...


Processing dataset:  29%|██████▏              | 156/534 [02:08<04:52,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  29%|██████▏              | 157/534 [02:09<04:46,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  30%|██████▏              | 158/534 [02:09<04:42,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  30%|██████▎              | 159/534 [02:10<04:43,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  30%|██████▎              | 160/534 [02:11<04:53,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  30%|██████▎              | 161/534 [02:12<04:43,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  30%|██████▎              | 162/534 [02:12<04:42,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  31%|██████▍              | 163/534 [02:13<04:27,  1.39it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  31%|██████▍              | 164/534 [02:14<04:25,  1.40it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  31%|██████▍              | 165/534 [02:14<04:20,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  31%|██████▌              | 166/534 [02:15<04:43,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  31%|██████▌              | 167/534 [02:16<04:36,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  31%|██████▌              | 168/534 [02:17<04:32,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 356, 191]
Trying to call OpenAI API...


Processing dataset:  32%|██████▋              | 169/534 [02:17<04:20,  1.40it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  32%|██████▋              | 170/534 [02:18<04:08,  1.46it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  32%|██████▋              | 171/534 [02:19<04:31,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  32%|██████▊              | 172/534 [02:20<04:49,  1.25it/s]

Retrieved in-context learning examples with idx:  [413, 356, 128]
Trying to call OpenAI API...


Processing dataset:  32%|██████▊              | 173/534 [02:20<04:23,  1.37it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  33%|██████▊              | 174/534 [02:21<04:15,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  33%|██████▉              | 175/534 [02:22<04:15,  1.40it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  33%|██████▉              | 176/534 [02:22<04:09,  1.43it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  33%|██████▉              | 177/534 [02:23<04:37,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  33%|███████              | 178/534 [02:24<04:35,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:  34%|███████              | 179/534 [02:25<04:22,  1.35it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  34%|███████              | 180/534 [02:26<04:21,  1.35it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:  34%|███████              | 181/534 [02:28<06:50,  1.16s/it]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  34%|███████▏             | 182/534 [02:28<05:51,  1.00it/s]

Retrieved in-context learning examples with idx:  [128, 191, 984]
Trying to call OpenAI API...


Processing dataset:  34%|███████▏             | 183/534 [02:29<05:31,  1.06it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  34%|███████▏             | 184/534 [02:30<05:17,  1.10it/s]

Retrieved in-context learning examples with idx:  [356, 413, 11]
Trying to call OpenAI API...


Processing dataset:  35%|███████▎             | 185/534 [02:31<05:07,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  35%|███████▎             | 186/534 [02:32<05:03,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  35%|███████▎             | 187/534 [02:32<04:53,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  35%|███████▍             | 188/534 [02:33<04:38,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  35%|███████▍             | 189/534 [02:34<04:32,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  36%|███████▍             | 190/534 [02:35<04:31,  1.27it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:  36%|███████▌             | 191/534 [02:35<04:14,  1.35it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  36%|███████▌             | 192/534 [02:36<04:12,  1.35it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  36%|███████▌             | 193/534 [02:37<04:16,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  36%|███████▋             | 194/534 [02:38<05:35,  1.01it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  37%|███████▋             | 195/534 [02:39<04:57,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  37%|███████▋             | 196/534 [02:40<04:50,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  37%|███████▋             | 197/534 [02:41<05:16,  1.06it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  37%|███████▊             | 198/534 [02:42<05:03,  1.11it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  37%|███████▊             | 199/534 [02:42<04:43,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  37%|███████▊             | 200/534 [02:43<04:22,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  38%|███████▉             | 201/534 [02:44<04:32,  1.22it/s]

Retrieved in-context learning examples with idx:  [356, 128, 413]
Trying to call OpenAI API...


Processing dataset:  38%|███████▉             | 202/534 [02:45<04:30,  1.23it/s]

Retrieved in-context learning examples with idx:  [1032, 413, 128]
Trying to call OpenAI API...


Processing dataset:  38%|███████▉             | 203/534 [02:46<04:40,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  38%|████████             | 204/534 [02:47<04:57,  1.11it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  38%|████████             | 205/534 [02:48<04:47,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  39%|████████             | 206/534 [02:48<04:42,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  39%|████████▏            | 207/534 [02:49<04:18,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  39%|████████▏            | 208/534 [02:50<04:09,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  39%|████████▏            | 209/534 [02:50<04:08,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  39%|████████▎            | 210/534 [02:51<04:07,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  40%|████████▎            | 211/534 [02:52<04:13,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  40%|████████▎            | 212/534 [02:53<04:15,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  40%|████████▍            | 213/534 [02:54<04:26,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  40%|████████▍            | 214/534 [02:55<04:19,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  40%|████████▍            | 215/534 [02:55<04:19,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  40%|████████▍            | 216/534 [02:56<04:23,  1.21it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  41%|████████▌            | 217/534 [02:57<04:13,  1.25it/s]

Retrieved in-context learning examples with idx:  [11, 413, 128]
Trying to call OpenAI API...


Processing dataset:  41%|████████▌            | 218/534 [02:58<04:03,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  41%|████████▌            | 219/534 [02:59<04:08,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  41%|████████▋            | 220/534 [02:59<04:08,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:  41%|████████▋            | 221/534 [03:00<04:12,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  42%|████████▋            | 222/534 [03:01<04:06,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  42%|████████▊            | 223/534 [03:02<03:55,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  42%|████████▊            | 224/534 [03:02<03:51,  1.34it/s]

Retrieved in-context learning examples with idx:  [11, 128, 4]
Trying to call OpenAI API...


Processing dataset:  42%|████████▊            | 225/534 [03:03<03:50,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  42%|████████▉            | 226/534 [03:04<03:49,  1.34it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  43%|████████▉            | 227/534 [03:05<03:50,  1.33it/s]

Retrieved in-context learning examples with idx:  [356, 128, 11]
Trying to call OpenAI API...


Processing dataset:  43%|████████▉            | 228/534 [03:05<03:43,  1.37it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  43%|█████████            | 229/534 [03:06<03:35,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  43%|█████████            | 230/534 [03:07<03:31,  1.44it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  43%|█████████            | 231/534 [03:07<03:41,  1.37it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  43%|█████████            | 232/534 [03:08<03:43,  1.35it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  44%|█████████▏           | 233/534 [03:09<03:40,  1.36it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  44%|█████████▏           | 234/534 [03:10<03:35,  1.39it/s]

Retrieved in-context learning examples with idx:  [191, 128, 413]
Trying to call OpenAI API...


Processing dataset:  44%|█████████▏           | 235/534 [03:10<03:28,  1.43it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  44%|█████████▎           | 236/534 [03:11<03:26,  1.44it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  44%|█████████▎           | 237/534 [03:12<03:35,  1.38it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  45%|█████████▎           | 238/534 [03:12<03:29,  1.41it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  45%|█████████▍           | 239/534 [03:13<03:21,  1.47it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  45%|█████████▍           | 240/534 [03:14<03:23,  1.45it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  45%|█████████▍           | 241/534 [03:15<03:33,  1.37it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:  45%|█████████▌           | 242/534 [03:16<04:07,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  46%|█████████▌           | 243/534 [03:16<03:50,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  46%|█████████▌           | 244/534 [03:17<03:47,  1.27it/s]

Retrieved in-context learning examples with idx:  [191, 356, 11]
Trying to call OpenAI API...


Processing dataset:  46%|█████████▋           | 245/534 [03:18<03:59,  1.21it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  46%|█████████▋           | 246/534 [03:19<03:49,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  46%|█████████▋           | 247/534 [03:19<03:32,  1.35it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  46%|█████████▊           | 248/534 [03:20<03:23,  1.41it/s]

Retrieved in-context learning examples with idx:  [191, 128, 413]
Trying to call OpenAI API...


Processing dataset:  47%|█████████▊           | 249/534 [03:21<03:15,  1.45it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  47%|█████████▊           | 250/534 [03:24<06:25,  1.36s/it]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  47%|█████████▊           | 251/534 [03:24<05:29,  1.16s/it]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  47%|█████████▉           | 252/534 [03:25<04:56,  1.05s/it]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  47%|█████████▉           | 253/534 [03:26<04:21,  1.08it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  48%|█████████▉           | 254/534 [03:26<04:02,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  48%|██████████           | 255/534 [03:27<03:59,  1.17it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  48%|██████████           | 256/534 [03:28<03:51,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  48%|██████████           | 257/534 [03:29<03:51,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  48%|██████████▏          | 258/534 [03:30<03:35,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  49%|██████████▏          | 259/534 [03:31<04:00,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  49%|██████████▏          | 260/534 [03:31<03:42,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  49%|██████████▎          | 261/534 [03:32<03:37,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  49%|██████████▎          | 262/534 [03:33<03:28,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1114]
Trying to call OpenAI API...


Processing dataset:  49%|██████████▎          | 263/534 [03:33<03:27,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 1114, 1000]
Trying to call OpenAI API...


Processing dataset:  49%|██████████▍          | 264/534 [03:34<03:35,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 356, 65]
Trying to call OpenAI API...


Processing dataset:  50%|██████████▍          | 265/534 [03:35<03:21,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 1114, 356]
Trying to call OpenAI API...


Processing dataset:  50%|██████████▍          | 266/534 [03:36<03:26,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  50%|██████████▌          | 267/534 [03:37<03:28,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 191, 1114]
Trying to call OpenAI API...


Processing dataset:  50%|██████████▌          | 268/534 [03:37<03:32,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  50%|██████████▌          | 269/534 [03:38<03:24,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  51%|██████████▌          | 270/534 [03:39<03:23,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:  51%|██████████▋          | 271/534 [03:40<03:22,  1.30it/s]

Retrieved in-context learning examples with idx:  [11, 128, 140]
Trying to call OpenAI API...


Processing dataset:  51%|██████████▋          | 272/534 [03:40<03:18,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 191, 31]
Trying to call OpenAI API...


Processing dataset:  51%|██████████▋          | 273/534 [03:41<03:21,  1.29it/s]

Retrieved in-context learning examples with idx:  [191, 128, 984]
Trying to call OpenAI API...


Processing dataset:  51%|██████████▊          | 274/534 [03:42<03:24,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  51%|██████████▊          | 275/534 [03:43<03:14,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  52%|██████████▊          | 276/534 [03:43<03:14,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  52%|██████████▉          | 277/534 [03:44<03:10,  1.35it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  52%|██████████▉          | 278/534 [03:45<03:14,  1.32it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  52%|██████████▉          | 279/534 [03:46<03:12,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  52%|███████████          | 280/534 [03:47<04:11,  1.01it/s]

Retrieved in-context learning examples with idx:  [128, 191, 140]
Trying to call OpenAI API...


Processing dataset:  53%|███████████          | 281/534 [03:48<04:13,  1.00s/it]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  53%|███████████          | 282/534 [03:50<04:29,  1.07s/it]

Retrieved in-context learning examples with idx:  [128, 356, 984]
Trying to call OpenAI API...


Processing dataset:  53%|███████████▏         | 283/534 [03:50<04:01,  1.04it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  53%|███████████▏         | 284/534 [03:51<03:34,  1.17it/s]

Retrieved in-context learning examples with idx:  [11, 191, 739]
Trying to call OpenAI API...


Processing dataset:  53%|███████████▏         | 285/534 [03:52<03:20,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  54%|███████████▏         | 286/534 [03:52<03:16,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  54%|███████████▎         | 287/534 [03:53<03:22,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  54%|███████████▎         | 288/534 [03:54<03:20,  1.23it/s]

Retrieved in-context learning examples with idx:  [356, 11, 413]
Trying to call OpenAI API...


Processing dataset:  54%|███████████▎         | 289/534 [03:55<03:30,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  54%|███████████▍         | 290/534 [03:56<03:41,  1.10it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  54%|███████████▍         | 291/534 [03:57<03:31,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  55%|███████████▍         | 292/534 [03:58<03:42,  1.09it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 11]
Trying to call OpenAI API...


Processing dataset:  55%|███████████▌         | 293/534 [03:58<03:21,  1.20it/s]

Retrieved in-context learning examples with idx:  [128, 11, 1032]
Trying to call OpenAI API...


Processing dataset:  55%|███████████▌         | 294/534 [03:59<03:12,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  55%|███████████▌         | 295/534 [04:00<03:08,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  55%|███████████▋         | 296/534 [04:01<03:06,  1.28it/s]

Retrieved in-context learning examples with idx:  [11, 128, 413]
Trying to call OpenAI API...


Processing dataset:  56%|███████████▋         | 297/534 [04:02<03:15,  1.21it/s]

Retrieved in-context learning examples with idx:  [128, 413, 191]
Trying to call OpenAI API...


Processing dataset:  56%|███████████▋         | 298/534 [04:02<03:09,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  56%|███████████▊         | 299/534 [04:03<03:04,  1.27it/s]

Retrieved in-context learning examples with idx:  [191, 65, 436]
Trying to call OpenAI API...


Processing dataset:  56%|███████████▊         | 300/534 [04:04<03:09,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  56%|███████████▊         | 301/534 [04:05<03:01,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  57%|███████████▉         | 302/534 [04:06<03:14,  1.19it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  57%|███████████▉         | 303/534 [04:06<03:03,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  57%|███████████▉         | 304/534 [04:07<02:55,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  57%|███████████▉         | 305/534 [04:08<02:58,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  57%|████████████         | 306/534 [04:09<02:53,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  57%|████████████         | 307/534 [04:09<02:42,  1.39it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 11]
Trying to call OpenAI API...


Processing dataset:  58%|████████████         | 308/534 [04:10<03:02,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  58%|████████████▏        | 309/534 [04:11<02:55,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  58%|████████████▏        | 310/534 [04:12<02:53,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  58%|████████████▏        | 311/534 [04:13<03:06,  1.19it/s]

Retrieved in-context learning examples with idx:  [128, 191, 65]
Trying to call OpenAI API...


Processing dataset:  58%|████████████▎        | 312/534 [04:13<02:51,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  59%|████████████▎        | 313/534 [04:14<02:55,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  59%|████████████▎        | 314/534 [04:15<02:56,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  59%|████████████▍        | 315/534 [04:16<02:50,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  59%|████████████▍        | 316/534 [04:16<02:44,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 11]
Trying to call OpenAI API...


Processing dataset:  59%|████████████▍        | 317/534 [04:17<03:00,  1.21it/s]

Retrieved in-context learning examples with idx:  [128, 11, 65]
Trying to call OpenAI API...


Processing dataset:  60%|████████████▌        | 318/534 [04:18<02:51,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 11, 1032]
Trying to call OpenAI API...


Processing dataset:  60%|████████████▌        | 319/534 [04:19<02:41,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  60%|████████████▌        | 320/534 [04:20<02:43,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  60%|████████████▌        | 321/534 [04:20<02:51,  1.24it/s]

Retrieved in-context learning examples with idx:  [356, 11, 128]
Trying to call OpenAI API...


Processing dataset:  60%|████████████▋        | 322/534 [04:21<02:52,  1.23it/s]

Retrieved in-context learning examples with idx:  [11, 356, 413]
Trying to call OpenAI API...


Processing dataset:  60%|████████████▋        | 323/534 [04:22<02:52,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  61%|████████████▋        | 324/534 [04:24<04:15,  1.22s/it]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  61%|████████████▊        | 325/534 [04:25<04:02,  1.16s/it]

Retrieved in-context learning examples with idx:  [413, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  61%|████████████▊        | 326/534 [04:26<03:34,  1.03s/it]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  61%|████████████▊        | 327/534 [04:27<03:18,  1.04it/s]

Retrieved in-context learning examples with idx:  [356, 413, 128]
Trying to call OpenAI API...


Processing dataset:  61%|████████████▉        | 328/534 [04:28<03:11,  1.07it/s]

Retrieved in-context learning examples with idx:  [11, 128, 356]
Trying to call OpenAI API...


Processing dataset:  62%|████████████▉        | 329/534 [04:28<03:01,  1.13it/s]

Retrieved in-context learning examples with idx:  [356, 11, 413]
Trying to call OpenAI API...


Processing dataset:  62%|████████████▉        | 330/534 [04:29<02:56,  1.15it/s]

Retrieved in-context learning examples with idx:  [11, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  62%|█████████████        | 331/534 [04:30<02:57,  1.15it/s]

Retrieved in-context learning examples with idx:  [356, 413, 128]
Trying to call OpenAI API...


Processing dataset:  62%|█████████████        | 332/534 [04:31<02:55,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  62%|█████████████        | 333/534 [04:32<02:45,  1.22it/s]

Retrieved in-context learning examples with idx:  [191, 31, 336]
Trying to call OpenAI API...


Processing dataset:  63%|█████████████▏       | 334/534 [04:33<02:44,  1.22it/s]

Retrieved in-context learning examples with idx:  [1032, 413, 356]
Trying to call OpenAI API...


Processing dataset:  63%|█████████████▏       | 335/534 [04:33<02:48,  1.18it/s]

Retrieved in-context learning examples with idx:  [11, 356, 413]
Trying to call OpenAI API...


Processing dataset:  63%|█████████████▏       | 336/534 [04:34<02:47,  1.19it/s]

Retrieved in-context learning examples with idx:  [413, 128, 1032]
Trying to call OpenAI API...


Processing dataset:  63%|█████████████▎       | 337/534 [04:35<02:38,  1.24it/s]

Retrieved in-context learning examples with idx:  [413, 356, 191]
Trying to call OpenAI API...


Processing dataset:  63%|█████████████▎       | 338/534 [04:36<02:33,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  63%|█████████████▎       | 339/534 [04:36<02:28,  1.32it/s]

Retrieved in-context learning examples with idx:  [191, 128, 356]
Trying to call OpenAI API...


Processing dataset:  64%|█████████████▎       | 340/534 [04:37<02:32,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  64%|█████████████▍       | 341/534 [04:38<02:31,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  64%|█████████████▍       | 342/534 [04:39<02:32,  1.26it/s]

Retrieved in-context learning examples with idx:  [191, 128, 413]
Trying to call OpenAI API...


Processing dataset:  64%|█████████████▍       | 343/534 [04:40<02:31,  1.26it/s]

Retrieved in-context learning examples with idx:  [128, 191, 1000]
Trying to call OpenAI API...


Processing dataset:  64%|█████████████▌       | 344/534 [04:41<02:34,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  65%|█████████████▌       | 345/534 [04:41<02:33,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  65%|█████████████▌       | 346/534 [04:42<02:44,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  65%|█████████████▋       | 347/534 [04:43<02:31,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  65%|█████████████▋       | 348/534 [04:44<02:28,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  65%|█████████████▋       | 349/534 [04:45<02:40,  1.15it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  66%|█████████████▊       | 350/534 [04:46<02:36,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  66%|█████████████▊       | 351/534 [04:47<02:45,  1.10it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  66%|█████████████▊       | 352/534 [04:47<02:32,  1.19it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  66%|█████████████▉       | 353/534 [04:48<02:39,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  66%|█████████████▉       | 354/534 [04:49<02:28,  1.21it/s]

Retrieved in-context learning examples with idx:  [128, 697, 739]
Trying to call OpenAI API...


Processing dataset:  66%|█████████████▉       | 355/534 [04:50<02:32,  1.17it/s]

Retrieved in-context learning examples with idx:  [128, 1114, 321]
Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 356/534 [04:51<02:25,  1.22it/s]

Retrieved in-context learning examples with idx:  [191, 4, 984]
Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 357/534 [04:51<02:23,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 358/534 [04:52<02:15,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  67%|██████████████       | 359/534 [04:53<02:19,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  67%|██████████████▏      | 360/534 [04:54<02:20,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  68%|██████████████▏      | 361/534 [04:55<02:19,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  68%|██████████████▏      | 362/534 [04:56<02:35,  1.11it/s]

Retrieved in-context learning examples with idx:  [128, 1114, 191]
Trying to call OpenAI API...


Processing dataset:  68%|██████████████▎      | 363/534 [04:57<02:30,  1.14it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  68%|██████████████▎      | 364/534 [04:57<02:26,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 413, 11]
Trying to call OpenAI API...


Processing dataset:  68%|██████████████▎      | 365/534 [04:58<02:23,  1.18it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  69%|██████████████▍      | 366/534 [04:59<02:17,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 191, 356]
Trying to call OpenAI API...


Processing dataset:  69%|██████████████▍      | 367/534 [05:00<02:15,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  69%|██████████████▍      | 368/534 [05:01<02:35,  1.07it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  69%|██████████████▌      | 369/534 [05:02<02:33,  1.07it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  69%|██████████████▌      | 370/534 [05:03<02:21,  1.16it/s]

Retrieved in-context learning examples with idx:  [128, 191, 413]
Trying to call OpenAI API...


Processing dataset:  69%|██████████████▌      | 371/534 [05:03<02:13,  1.22it/s]

Retrieved in-context learning examples with idx:  [128, 191, 65]
Trying to call OpenAI API...


Processing dataset:  70%|██████████████▋      | 372/534 [05:04<02:08,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  70%|██████████████▋      | 373/534 [05:05<02:05,  1.29it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  70%|██████████████▋      | 374/534 [05:06<02:04,  1.28it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  70%|██████████████▋      | 375/534 [05:06<02:01,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  70%|██████████████▊      | 376/534 [05:07<01:59,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 191, 1032]
Trying to call OpenAI API...


Processing dataset:  71%|██████████████▊      | 377/534 [05:08<02:00,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  71%|██████████████▊      | 378/534 [05:09<01:57,  1.33it/s]

Retrieved in-context learning examples with idx:  [128, 356, 413]
Trying to call OpenAI API...


Processing dataset:  71%|██████████████▉      | 379/534 [05:09<01:59,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  71%|██████████████▉      | 380/534 [05:10<02:05,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  71%|██████████████▉      | 381/534 [05:11<02:15,  1.13it/s]

Retrieved in-context learning examples with idx:  [128, 413, 1032]
Trying to call OpenAI API...


Processing dataset:  72%|███████████████      | 382/534 [05:12<02:02,  1.24it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  72%|███████████████      | 383/534 [05:13<02:00,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  72%|███████████████      | 384/534 [05:14<01:58,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 191, 1032]
Trying to call OpenAI API...


Processing dataset:  72%|███████████████▏     | 385/534 [05:14<02:00,  1.23it/s]

Retrieved in-context learning examples with idx:  [128, 413, 356]
Trying to call OpenAI API...


Processing dataset:  72%|███████████████▏     | 386/534 [05:15<01:56,  1.27it/s]

Retrieved in-context learning examples with idx:  [191, 128, 65]
Trying to call OpenAI API...


Processing dataset:  72%|███████████████▏     | 387/534 [05:16<01:57,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 413]
Trying to call OpenAI API...


Processing dataset:  73%|███████████████▎     | 388/534 [05:17<01:51,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  73%|███████████████▎     | 389/534 [05:17<01:46,  1.36it/s]

Retrieved in-context learning examples with idx:  [128, 1032, 356]
Trying to call OpenAI API...


Processing dataset:  73%|███████████████▎     | 390/534 [05:18<01:53,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  73%|███████████████▍     | 391/534 [05:19<01:49,  1.30it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  73%|███████████████▍     | 392/534 [05:20<01:59,  1.19it/s]

Retrieved in-context learning examples with idx:  [128, 11, 356]
Trying to call OpenAI API...


Processing dataset:  74%|███████████████▍     | 393/534 [05:21<02:06,  1.12it/s]

Retrieved in-context learning examples with idx:  [128, 11, 191]
Trying to call OpenAI API...


Processing dataset:  74%|███████████████▍     | 394/534 [05:22<02:06,  1.11it/s]

Retrieved in-context learning examples with idx:  [128, 191, 11]
Trying to call OpenAI API...


Processing dataset:  74%|███████████████▌     | 395/534 [05:23<01:57,  1.19it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  74%|███████████████▌     | 396/534 [05:23<01:49,  1.26it/s]

Retrieved in-context learning examples with idx:  [191, 128, 11]
Trying to call OpenAI API...


Processing dataset:  74%|███████████████▌     | 397/534 [05:24<01:51,  1.22it/s]

Retrieved in-context learning examples with idx:  [191, 65, 140]
Trying to call OpenAI API...


Processing dataset:  75%|███████████████▋     | 398/534 [05:25<01:47,  1.27it/s]

Retrieved in-context learning examples with idx:  [128, 11, 413]
Trying to call OpenAI API...


Processing dataset:  75%|███████████████▋     | 399/534 [05:26<01:43,  1.30it/s]

Retrieved in-context learning examples with idx:  [11, 128, 191]
Trying to call OpenAI API...


Processing dataset:  75%|███████████████▋     | 400/534 [05:26<01:42,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 356, 11]
Trying to call OpenAI API...


Processing dataset:  75%|███████████████▊     | 401/534 [05:27<01:41,  1.31it/s]

Retrieved in-context learning examples with idx:  [128, 11, 1032]
Trying to call OpenAI API...


Processing dataset:  75%|███████████████▊     | 402/534 [05:28<01:39,  1.33it/s]

Retrieved in-context learning examples with idx:  [11, 128, 356]
Trying to call OpenAI API...


Processing dataset:  75%|███████████████▊     | 403/534 [05:29<01:45,  1.25it/s]

Retrieved in-context learning examples with idx:  [128, 356, 1032]
Trying to call OpenAI API...


Processing dataset:  76%|███████████████▉     | 404/534 [05:30<01:46,  1.22it/s]


IndexError: index 404 is out of bounds for dimension 0 with size 404

In [None]:
df_test.to_csv(f"predictions/{model}_enriched_kw_test_outputs_in_context_ICL.csv") #save k-nearest-neihgbor_ICL to csv

df_test[f'gpt_predictions_in_context_random'] = apply_gpt_with_progress(df_train, df_test['input_journal_title_abstract'], use_random=True)

In [None]:
df_test.to_csv(f"predictions/{model}_enriched_kw_test_outputs_random_ICL.csv") #save random_ICL to csv

## Evaluate

In [None]:
df_test_to_eval = df_test.copy()
df_test_to_eval.head()

In [None]:
prompt_ids_to_test = ["in_context", "in_context_random"]

In [None]:
labels = ["Human-systematic-review", "Human-RCT-drug-intervention", "Human-RCT-non-drug-intervention", "Human-RCT-non-intervention", "Human-case-report", "Human-non-RCT-drug-intervention", "Human-non-RCT-non-drug-intervention", "Animal-systematic-review", "Animal-drug-intervention", "Animal-non-drug-intervention", "Animal-other", "Non-systematic-review", "In-vitro-study", "Clinical-study-protocol", "Remaining"]

label_to_numerical = {label: i for i, label in enumerate(labels)}
label_to_numerical["label missing"] = -1

In [None]:
Map predictions to numerical

In [None]:
label_to_numerical = {
    'Remaining': 0,
    'Non-systematic-review': 1,
    'Human-non-RCT-non-drug-intervention': 2,
    'Human-non-RCT-drug-intervention': 3,
    'Human-case-report': 4,
    'Animal-other': 5,
    'Animal-drug-intervention': 6,
    'Human-systematic-review': 7,
    'In-vitro-study': 8,
    'Human-RCT-non-drug-intervention': 9,
    'Animal-non-drug-intervention': 10,
    'Human-RCT-drug-intervention': 11,
    'Clinical-study-protocol': 12,
    'Human-RCT-non-intervention': 13
}

In [None]:
numerical_to_label = {v: f"{v} - {k}" for k, v in label_to_numerical.items()}

In [None]:
import difflib

def map_label_fuzzy(label, label_dict):
    #label = label.lower().replace('-', '').replace('_', ' ').strip()
    best_match = difflib.get_close_matches(label, label_dict.keys(), n=1, cutoff=0.6)
    if best_match:
        #print(f'{label} matched to {best_match[0]}')
        return label_dict[best_match[0]]
    return -1

In [None]:
map_label_fuzzy("RCT-drug-intervention", label_to_numerical)

In [None]:
def map_label_to_numerical(label, label_dict):
    # Check if label is a dictionary
    if isinstance(label, dict):
        # Extract the label with the highest score/probability
        highest_label = max(normalized_label, key=label.get)
        return label_dict.get(highest_label, -1)
    else:
         # Normalize label
        normalized_label = label.replace(',', '').strip().replace(' ', '-')#.strip()
        # Directly map string labels to numerical IDs
        numerical_label = label_dict.get(label, -1)
        # Fuzzy match if no direct mapping possible
        if numerical_label == -1:
            numerical_label = map_label_fuzzy(normalized_label, label_dict)
        # If fuzzy match did not work, check if the label string contains the key 'label' and use it to split the string; keep everything to the right as the potential label
        if numerical_label == -1:
            if 'label' in label:
                label_part = label.split('label')[1]
                numerical_label = map_label_fuzzy(label_part, label_dict)
        return numerical_label
        
# Convert accepted labels to numerical
df['accepted_label_numerical'] = df['accepted_label'].apply(lambda x: map_label_to_numerical(x, label_to_numerical))


for col in prediction_columns:
    df[f'{col}_numerical'] = df[col].apply(lambda x: map_label_to_numerical(x, label_to_numerical))

df.head()

In [None]:
Important: some labels from GPT could not be mapped to a target label

In [None]:
rows_with_minus_one = df[(df == -1).any(axis=1)]
rows_with_minus_one

In [None]:
def evaluate_predictions(df, target_label_col, prompt_ids_to_eval, model, eval_type, label_to_numerical, numerical_to_label): ## Initialize a list to hold DataFrame for each report and summary statistics
    report_dfs = []
    summary_stats = []

    # Iterate over each GPT prediction column
    for prompt_id in prompt_ids_to_eval:
        print("Evaluating ", prompt_id)
        prediction_col = f'gpt_predictions_{prompt_id}_clean'

    df_test_to_eval[prediction_col] = df_test_to_eval[f'gpt_predictions_{prompt_id}'].apply(
            lambda x: json.loads(x)['gpt_label'] if isinstance(x, str) and 'gpt_label' in json.loads(x) else x)
    
        # Map GPT predictions to numerical values
        df_test_to_eval[f'{prediction_col}_numerical'] = df_test_to_eval[prediction_col].apply(map_label_to_numerical)

        # Extract arrays for evaluation
        y_true = df_test_to_eval['accepted_label_numerical'].values
        y_pred = df_test_to_eval[f'{prediction_col}_numerical'].values
    
        # Calculate metrics
        accuracy = accuracy_score(y_true, y_pred)
        accuracy_balanced = balanced_accuracy_score(y_true, y_pred)
        report = classification_report(y_true, y_pred, output_dict=True, zero_division=0, labels=range(len(labels)), target_names=labels)
    
        # Create DataFrame from report
        report_df = pd.DataFrame(report).transpose()
        report_df['Prompt ID'] = prompt_id  # Add column to indicate the prompt ID
        report_dfs.append(report_df)
    
        # Extract summary statistics (average precision, recall, F1)
        summary = report_df.loc['weighted avg', ['precision', 'recall', 'f1-score']].to_dict()
        summary['Prompt ID'] = prompt_id
        summary_stats.append(summary)

        # Plotting confusion matrix
        plt.figure(figsize=(10, 6))
        ax = sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(len(label_to_numerical)), yticklabels=range(len(label_to_numerical)))
        ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=13)
        ax.set_xticklabels(ax.get_xticklabels(), rotation=0, fontsize=13)
        plt.title(f'Confusion Matrix for Model {model} and ICL-Prompt {prompt_id}', fontsize=14)
        plt.xlabel('Predicted Labels', fontsize=13)
        plt.ylabel('True Labels', fontsize=13)

        # Add an inset with label mapping
        textstr = '\n'.join([f'{v}: {k}' for k, v in label_to_numerical.items()])
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        ax.text(1.16, 1.0, textstr, transform=ax.transAxes, fontsize=10, verticalalignment='top', bbox=props)
        plt.tight_layout()
        plt.savefig(f'plots/confusion_matrix_{model}_enriched_kw_test_{prompt_id}_{eval_type}_ICL.pdf')  # Save to PDF

        # Combine all report DataFrames
        all_reports_df = pd.concat(report_dfs)

        # Create a summary table for average precision, recall, and F1-score
        summary_df = pd.DataFrame(summary_stats)

        # Save results to CSV files
        all_reports_df.to_csv(f"evaluations/{model}_enriched_kw_test_per_class_{'_'.join(prompt_ids_to_eval)}_{eval_type}_ICL.csv")
        summary_df.to_csv(f"evaluations/{model}_enriched_kw_test_summary_{'_'.join(prompt_ids_to_eval)}_{eval_type}_ICL.csv")

In [None]:
all_reports_df

In [None]:
summary_df

In [None]:
print(summary_df)