In [8]:
import pprint
from utils import get_llm_response, load_json, write_json

### Belief and prediction generation prompts


##### Answer-conflicts

In [None]:
import re
import json

def get_dict(text):
    return re.findall(r'{[\s\S]*}', text)[0]

belief_prompt = (
    "Give a short answer to the following question. Your answer should be in the format {{\"Explanation\": \"ANSWER EXPLANATION\", \"Answer\": \"ANSWER TEXT\"}}:\n"
    "Q: {question}\n"
    "A:"
)
def get_model_belief(question, model="llama3-70b-instruct"):
    res = get_llm_response(belief_prompt.format(question=question), model=model)
    return json.loads(get_dict(res))

predict_prompt = (
    "Given two related evidence, provide a short answer to the following question. Your answer should be in the format {{\"Explanation\": \"ANSWER EXPLANATION\", \"Answer\": \"ANSWER TEXT\"}}:\n"
    "Evidence 1: {e1}\n"
    "Evidence 2: {e2}\n"
    "Q: {question}\n"
    "A:"
)
def get_model_prediction(question, e1, e2, model="llama3-70b-instruct"):
    res = get_llm_response(predict_prompt.format(question=question, e1=e1, e2=e2), model=model)
    return json.loads(get_dict(res))

In [None]:
import pandas as pd
df = pd.read_csv("../annotations/answer_conflict_annotations.csv")
df = df[df['data_label']==1]
df = df[df['annotator_majority']==1]
df

In [None]:
import os
from tqdm import tqdm
os.makedirs("results", exist_ok=True)

for model in ["llama3-8b-instruct", "llama3-70b-instruct", "claude-v3-haiku", "claude-v3-sonnet"]:
    prompt_records = []
    for item in tqdm(df.to_dict(orient="records")):
        # pprint.pp(item)
        pairs = eval(item['pairs'])
        q = pairs[0]
        q_end = q.index("?")+1
        q = q[: q_end]
        e1 = pairs[0][q_end:].strip()
        e2 = pairs[1][q_end:].strip()

        answer_pairs = eval(item['answer_pairs'])
        a1 = answer_pairs[0]
        a2 = answer_pairs[1]

        ground_truths = eval(item['ground_truths'])
        is_gt = [a1 in ground_truths, a2 in ground_truths]

        types = eval(item['annotated_type'])
        instance_id = item['instance_id']

        while True:
            try:
                belief = get_model_belief(q, model=model)
                break
            except Exception as e:
                print("Belief", e)

        while True:
            try:
                prediction = get_model_prediction(q, e1, e2, model)
                break
            except Exception as e:
                print("Predict", e)

        prompt_records.append({
            "question": q,
            "e1": (a1, is_gt[0], e1),
            "e2": (a2, is_gt[1], e2),
            "types": types,
            "llm_belief": belief,
            "llm_predict": prediction,
            "model": model,
            "instance_id": instance_id
        })
    write_json(f"results/{model}.json", prompt_records)

##### Factoid-conflicts

In [12]:
import pandas as pd
from utils import get_llm_response, load_json, write_json
factoid_df = pd.read_csv("../annotations/factoid_conflict_annotations.csv")

In [15]:
import re
import json

def get_dict(text):
    return re.findall(r'{[\s\S]*}', text)[0]

belief_prompt = (
    "Give a yes/no answer to the following question. Your answer should be in the format {{\"Explanation\": \"ANSWER EXPLANATION\", \"Answer\": \"YES or NO or UNKNOWN\"}}:\n"
    "Q: {question}\n"
    "A:"
)
def get_model_belief(question, model="llama3-70b-instruct"):
    res = get_llm_response(belief_prompt.format(question=question), model=model)
    return json.loads(get_dict(res))

predict_prompt = (
    "Given two related evidence, provide a yes/no answer to the following question. Your answer should be in the format {{\"Explanation\": \"ANSWER EXPLANATION\", \"Answer\": \"YES or NO or UNKNOWN\"}}:\n"
    "Evidence 1: {e1}\n"
    "Evidence 2: {e2}\n"
    "Q: {question}\n"
    "A:"
)
def get_model_prediction(question, e1, e2, model="llama3-70b-instruct"):
    res = get_llm_response(predict_prompt.format(question=question, e1=e1, e2=e2), model=model)
    return json.loads(get_dict(res))

In [None]:
import os
from tqdm import tqdm
os.makedirs("factoid_results", exist_ok=True)

for model in ["claude-v3-haiku", "claude-v3-sonnet", "llama3-8b-instruct", "llama3-70b-instruct"]:
    prompt_records = []
    for item in tqdm(factoid_df.to_dict(orient="records")):
        # pprint.pp(item)
        pairs = eval(item['pairs'])
        q = pairs[0]
        q_end = q.index("?")+2
        q = q[: q_end]
        e1 = pairs[0][q_end:].strip()
        e2 = pairs[1][q_end:].strip()

        a1, a2 = eval(item['answer_pairs'])

        factoid_pairs = eval(item['factoids_pairs'])
        af1 = factoid_pairs["factoids_1"]
        af2 = factoid_pairs["factoids_2"]

        ground_truths = item['ground_truths']
        # is_gt = [1, 0]

        types = eval(item['annotated_type'])
        instance_id = item['instance_id']

        while True:
            try:
                belief = get_model_belief(q, model=model)
                break
            except Exception as e:
                print("Belief", e)

        while True:
            try:
                prediction = get_model_prediction(q, e1, e2, model)
                break
            except Exception as e:
                print("Predict", e)

        prompt_records.append({
            "question": q,
            "e1": (a1, af1,  e1),
            "e2": (a2, af2,  e2),
            "a": ground_truths,
            "types": types,
            "llm_belief": belief,
            "llm_predict": prediction,
            "model": model,
            "instance_id": instance_id
        })
    write_json(f"factoid_results/{model}.json", prompt_records)