In [None]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
from tqdm import tqdm
import random
from pprint import pprint
import re
import copy
import json

In [None]:
from openai import OpenAI
client = OpenAI()
import time
SLEEP_TIME = 1

def gpt4(input_text, prior_messages=None):
    gpt4_kwargs = {
        "model": "gpt-4-turbo",
        "temperature": 0,
    }
    if prior_messages is None:
        messages = [{"role": "system", "content": "You are a helpful assistant."}]
    else:
        messages = copy.deepcopy(prior_messages)
    messages.append({"role": "user", "content": input_text})
    while True:
        try:
            response = client.chat.completions.create(messages=messages, **gpt4_kwargs)
            break
        except Exception as e:
            print("-"*30)
            print(e)
            if type(e).__name__ == "RateLimitError":
                print(f"Sleep for {SLEEP_TIME}......")
                time.sleep(SLEEP_TIME)
            print("-"*30)
    output_text = response.choices[0].message.content
    messages.append({"role": "assistant", "content": output_text})
    return output_text, messages

In [None]:
def load_data(path):
    excel_file = pd.ExcelFile(path)
    sheet_names = excel_file.sheet_names
    all_sheets_data = {sheet: excel_file.parse(sheet) for sheet in sheet_names}
    return all_sheets_data

In [None]:
raw_data = load_data("Answerability Annotation.xlsx")

In [None]:
prompt_template = """You are given a passage and a list of questions. For each question, determine if there exists any part of the passage that can answer the question.
If the question is answered, identify the text span that answers the question; if the question is unanswered, just return "None".

Organize your response in JSON-format, shaped as the following:

[
    {
        "question": QUESTION_1,
        "judgment": "answered",
        "text_span", YOUR_IDENTIFIED_TEXT_SPAN
    },
    {
        "question": QUESTION_2,
        "judgment": "unanswered",
        "text_span", "None"
    },
    ...
]

Place the JSON-format response between <answer> and </answer> tags.

Here are a few examples you can use for reference:

$FEW-SHOT-EXAMPLES

Passage: $PASSAGE
List of questions: $LIST_OF_QUESTIONS
Response in JSON-format: """

In [None]:
HARD = ['Article1', 'Article2', 'Article3', 'Article4', 'Article5']
def prepare_few_shot_examples():
    all_examples = list()
    for tab in HARD:
        df = raw_data[tab]
        list_of_questions = df['Sub-Question'].tolist()
        for rag_sys in ["You.com", "Perplexity.ai"]:
            passage = df[rag_sys][0].replace("[", "").replace("]", "")
            example_response = list()
            for _, row in df.iterrows():
                question = row["Sub-Question"]
                judgment = row[f"{rag_sys}-judgment"]
                assert judgment in ["answered", "unanswered"], print(tab, rag_sys, _, judgment)
                text_span = "None" if judgment == "unanswered" else row[rag_sys][row[rag_sys].index("[")+1:row[rag_sys].index("]")]
                example_response.append({"question": question, "judgment": judgment, "text_span": text_span})
            example = "Passage: $PASSAGE\nList of questions: $LIST_OF_QUESTIONS\nResponse in JSON-format: $RESPONSE_IN_JSON_FORMAT"
            example = example.replace("$PASSAGE", passage).replace("$LIST_OF_QUESTIONS", json.dumps(list_of_questions, indent=4)).replace("$RESPONSE_IN_JSON_FORMAT", json.dumps(example_response, indent=4))
            all_examples.append(example)
    return "\n\n".join(all_examples)

few_shot_examples = prepare_few_shot_examples()

def subq_coverage_measurement(passage, list_of_questions):
    raw_pred, _ = gpt4(prompt_template.replace("$PASSAGE", passage).replace("$LIST_OF_QUESTIONS", json.dumps(list_of_questions, indent=4)).replace("$FEW-SHOT-EXAMPLES", few_shot_examples))
    try:
        parsed_pred = json.loads(raw_pred.replace("<answer>", "").replace("</answer>", "").strip())
    except:
        parsed_pred = None
    return parsed_pred