In [70]:
import ast
import csv
import io
import json
import os
import random
from typing import Any, Dict, List, Tuple

import numpy as np
import pandas as pd
from openai import OpenAI

from src.squadv2_instructions import explanation_icl_input, explanation_instruction, normal_icl_input, normal_instruction
from src.utils.general_utils import white_space_fix

In [58]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [59]:
def process_squadv2_dataset(
    file_name: str, experiment_type: str, instruction_template: str, input_template: str, output_template: str
) -> Tuple[List[str], List[str], List[str], List[str], str]:
    """Read and pre-process the squadv2 dataset for my application."""

    dataset = pd.read_csv(file_name, sep="\t").to_dict(orient="records")
    if experiment_type == "normal_no_icl":
        instruction = normal_instruction
    elif experiment_type == "explanation_no_icl":
        instruction = explanation_instruction

    formed_instruction = instruction_template.format(instruction=instruction)

    next_example_number = 11 if "_no_" not in experiment_type else -1
    squad_inputs = []
    squad_outputs = []
    gold_outputs = []
    squad_ids = []
    idx = 0
    for row in dataset:
        idx += 1
        context = row["context"]
        question = row["question"]
        gold_answers = ast.literal_eval(row["answers"])
        context = white_space_fix(context)
        question = white_space_fix(question)
        if experiment_type == "normal_no_icl":
            user_final_message = f"Passage: {context}"
            user_final_message += f"\nQuestion: {question}"
            user_final_message += "\nFinal Answer: "
        elif experiment_type == "explanation_no_icl":
            user_final_message = f"Passage: {context}"
            user_final_message += f"\nQuestion: {question}"
            user_final_message += "\nExplanations and Thought Process and Final Answer: "
        formed_input = input_template.format(input=user_final_message)
        squad_input = formed_input
        squad_inputs.append(squad_input)
        squad_ids.append(str(idx))

        gold_outputs.append("_@_".join(gold_answers))
        gold_answer = random.choice(gold_answers)
        if experiment_type == "normal_no_icl":
            squad_output = output_template.format(output=f"Final Answer: {gold_answer}")
            squad_outputs.append(squad_output)
    return squad_inputs, squad_ids, squad_outputs, gold_outputs, formed_instruction

In [60]:
# my gpt4-omini chat templates.
instruction_template = "<s> You will output a json object containing the following information:\n{instruction} </s>"
input_template = "<s> {input} </s>"
output_template = "<s> {output} </s>"
input_file = "../data/0.1-shot-datasets/squad/original_validation_part3.tsv"
experiment_type = "explanation_no_icl"
# read the input data.
squad_inputs, squad_ids, _, gold_outputs, formed_instruction = process_squadv2_dataset(
    input_file, experiment_type, instruction_template, input_template, output_template
)

row_id_to_answer_mapper = {row_id: gold_outputs[idx] for idx, row_id in enumerate(squad_ids)}
# print(row_id_to_answer_mapper)
print(len(squad_inputs))
print(len(squad_ids))
print(len(gold_outputs))
print(squad_inputs[0])
print(squad_ids[0])
print(gold_outputs[0])
print(formed_instruction)

4037
4037
4037
<s> Passage: The system of bureaucracy created by Kublai Khan reflected various cultures in the empire, including that of the Han Chinese, Khitans, Jurchens, Mongols, and Tibetan Buddhists. While the official terminology of the institutions may indicate the government structure was almost purely that of native Chinese dynasties, the Yuan bureaucracy actually consisted of a mix of elements from different cultures. The Chinese-style elements of the bureaucracy mainly came from the native Tang, Song, as well as Khitan Liao and Jurchen Jin dynasties. Chinese advisers such as Liu Bingzhong and Yao Shu gave strong influence to Kublai's early court, and the central government administration was established within the first decade of Kublai's reign. This government adopted the traditional Chinese tripartite division of authority among civil, military, and censorial offices, including the Central Secretariat (Zhongshu Sheng) to manage civil affairs, the Privy Council (Chinese: 樞密

In [61]:
tasks = []

for index, squad_input in enumerate(squad_inputs):
    task = {
        "custom_id": f"task-squadv2-{experiment_type}-{squad_ids[index]}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": "gpt-4o-mini",
            "temperature": 0.001,
            "top_p": 0.9,
            "n": 1,
            "max_tokens": 256,
            "seed": 42,
            "logprobs": True,
            "stop": ["</s>"],
            "response_format": {"type": "json_object"},
            "messages": [{"role": "system", "content": formed_instruction}, {"role": "user", "content": squad_input}],
        },
    }
    tasks.append(task)

In [62]:
print(tasks[10])

{'custom_id': 'task-squadv2-explanation_no_icl-11', 'method': 'POST', 'url': '/v1/chat/completions', 'body': {'model': 'gpt-4o-mini', 'temperature': 0.001, 'top_p': 0.9, 'n': 1, 'max_tokens': 256, 'seed': 42, 'logprobs': True, 'stop': ['</s>'], 'response_format': {'type': 'json_object'}, 'messages': [{'role': 'system', 'content': '<s> You will output a json object containing the following information:\nThis task is about writing a correct answer for the reading comprehension task. Based on the information provided in a given passage, you should identify the shortest continuous text span from the passage that serves as an answer to the given question. Avoid answers that are incorrect or have incomplete justification. Generate your explanations and thought process before generating the final answer. If you cannot find the answer from the passage for the given question, then generate the <no_answer> tag as the final answer. </s>'}, {'role': 'user', 'content': '<s> Passage: While the exist

In [63]:
file_id = input_file.rstrip(".tsv").replace(".", "").replace("/", "_")
print(file_id)
file_name = f"../data/openai_batch_squadv2.{experiment_type}.{file_id}.jsonl"

with open(file_name, "w") as file:
    for obj in tasks:
        file.write(json.dumps(obj) + "\n")

_data_01-shot-datasets_squad_original_validation_part3


In [64]:
batch_file = client.files.create(file=open(file_name, "rb"), purpose="batch")

In [65]:
print(batch_file)

FileObject(id='file-TexCWKicXEGGeHQZGOpAOtwA', bytes=7747679, created_at=1721833381, filename='openai_batch_squadv2.explanation_no_icl._data_01-shot-datasets_squad_original_validation_part3.jsonl', object='file', purpose='batch', status='processed', status_details=None)


In [66]:
batch_job = client.batches.create(input_file_id=batch_file.id, endpoint="/v1/chat/completions", completion_window="24h")

In [69]:
batch_job = client.batches.retrieve(batch_job.id)
print(batch_job)

Batch(id='batch_IX22tvuT6KqkIGNQXwoceRqE', completion_window='24h', created_at=1721833384, endpoint='/v1/chat/completions', input_file_id='file-TexCWKicXEGGeHQZGOpAOtwA', object='batch', status='in_progress', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1721919784, failed_at=None, finalizing_at=None, in_progress_at=1721833388, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=4037))


In [68]:
result_file_id = batch_job.output_file_id
result = client.files.content(result_file_id).content

ValueError: Expected a non-empty value for `file_id` but received None

In [48]:
result_file_name = f"../data/openai_batch_squadv2.{experiment_type}.{file_id}.results.jsonl"

with open(result_file_name, "wb") as file:
    file.write(result)

In [49]:
# Read data and save in the correct format.
# Loading data from saved file
results = []
with open(result_file_name, "r") as file:
    for line in file:
        # Parsing the JSON string into a dict and appending to the list of results
        json_object = json.loads(line.strip())
        results.append(json_object)

In [50]:
result = results[0]["response"]["body"]["choices"][0]["message"]["content"]

In [51]:
print(result)


{"explanation": "The passage discusses various ctenophores and their feeding habits. It specifically mentions that members of the genus Haeckelia incorporate nematocysts from jellyfish into their tentacles. However, it does not provide information about Pleurobrachia incorporating anything into their tentacles. Therefore, since the passage does not mention what Pleurobrachia incorporates into their tentacles, the answer is <no_answer>.", "final_answer": "<no_answer>"}


In [52]:
print(results[100]["response"]["body"]["choices"][0]["message"]["content"])


{"explanation": "The passage states that Bill Aken made his first TV appearance playing guitar on the old country-western show at The Fresno Barn. This directly answers the question about the show on which he made his television debut.", "final_answer": "The Fresno Barn"}


In [53]:
print([each["logprob"] for each in results[100]["response"]["body"]["choices"][0]["logprobs"]["content"]])

[-0.051726244, -0.8501451, -0.20377287, -0.004627003, -0.16022788, -1.6882126e-05, -0.5010923, -0.16225983, -0.63491267, -0.0046973573, -0.35759595, -5.6769813e-06, -0.004808061, -0.0014078516, -4.723352e-06, -0.00075542775, -0.018155914, -7.89631e-07, -0.17613155, -3.202099e-05, -0.0013624972, -0.2633079, -0.15924208, -4.365741e-06, -9.0883464e-07, 0.0, -6.704273e-07, -0.0010581758, -0.070725225, -3.1281633e-07, 0.0, -0.0007828262, -0.32516602, -0.88980186, -0.024234312, -5.6769813e-06, -0.000107715314, -0.4178135, -0.75545716, -0.019023685, -0.6219828, 0.0, -0.043739893, -0.016116716, -3.5313153e-06, -0.011317071, -3.1281633e-07, -0.30381453, -5.1808798e-05, -0.31489024, -2.9994528e-05, -1.4855664e-05, -3.1281633e-07, -0.07989125, -0.0019476758, 0.0, -0.0011714138]


In [54]:
print(results[100]["custom_id"].removeprefix("task-squadv2-normal_no_icl-"))

task-squadv2-explanation_no_icl-101


In [56]:
output_file = (
    f"/scratch/ssd004/scratch/snajafi/gpt4-omini-predictions/original_validation_part2.{experiment_type}.gtp4-omini.tsv"
)

with io.open(output_file, mode="w", encoding="utf-8") as out_fp:
    writer = csv.writer(out_fp, quotechar='"', quoting=csv.QUOTE_ALL)
    headers = ["potential_answer", "prediction_score", "row_id", "gold_answer"]
    writer.writerow(headers)
    for idx, result in enumerate(results):
        potential_answer = result["response"]["body"]["choices"][0]["message"]["content"]
        logprobs = [
            each["logprob"] for each in result["response"]["body"]["choices"][0]["logprobs"]["content"] if each["logprob"] > 0.0
        ]
        prediction_score = np.mean(logprobs) if len(logprobs) > 0 else 0.0
        row_id = result["custom_id"].removeprefix(f"task-squadv2-{experiment_type}-")
        to_write = [potential_answer, prediction_score, row_id, row_id_to_answer_mapper[row_id]]
        writer.writerow(to_write)