In [1]:
import gc
import os
import json
import torch
import transformers
import pandas as pd
from tqdm import tqdm



In [4]:
def get_response(model, tok, messages, max_new_tokens=1):
    terminators = [tok.eos_token_id, tok.convert_tokens_to_ids("<|eot_id|>")]
    msg_tokenized = tok.apply_chat_template(messages, add_generation_prompt=True, return_tensors='pt')
    output_ids = model.generate(msg_tokenized.to(model.device), max_new_tokens=max_new_tokens, eos_token_id=terminators, do_sample=False, pad_token_id=tok.eos_token_id)
    return tok.decode(output_ids[0][msg_tokenized.shape[-1]:], skip_special_tokens=True)


device = 'cuda:0'
topic_name = 'places_landmark'  # health_medication
model_ls = ['meta-llama/Meta-Llama-3.1-8B-Instruct', 'mistralai/Mistral-7B-Instruct-v0.3', 'google/gemma-2-9b-it', 
            'lmsys/vicuna-7b-v1.5', 'chavinlo/alpaca-native', 'meta-llama/Meta-Llama-3-8B-Instruct']
model_id = model_ls[1]
model_id_format = model_id.split('/')[-1].replace('-', '_').lower()
print(model_id_format)

topic_ls = []
for filename in os.listdir("../data/questions/all_3_types/"):
    if filename.endswith(".json"):
        df = pd.read_json(f"../data/questions/all_3_types/{filename}", lines=True)
        df_wh = df[df.type=='wh'].copy()
        domain_topic_name = filename.replace('_questions.json', '')
        topic_ls.append(domain_topic_name)
print(topic_ls)

mistral_7b_instruct_v0.3


In [5]:
system_msg_qa = "Always respond to the following question concisely with a short phrase or single-word answer. Do not repeat the question or provide additional context. "

# MC_content = "The following question's topic is about " + topic_name + ". Choose the only correct option for the multiple choice problem. (Answer 'A', 'B', 'C' or 'D')(Don't explain)"
# yes_no_content = "The following question's topic is about " + topic_name + ". Only need to answer 'Yes' or 'No', and don't explain"
# Wh_content = "The following question's topic is about " + topic_name + ". Directly give me the answer in 'phrase' or 'word' format. Don't give me a sentence or explain"

tok_qa = transformers.AutoTokenizer.from_pretrained(model_id)   
model_qa = transformers.AutoModelForCausalLM.from_pretrained(model_id).to(device)

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [31]:
df_wh.columns

Index(['type', 'subject', 'relation', 'object', 'question', 'label'], dtype='object')

In [6]:
# Get response for wh questions
print(model_id_format)
df_all_topics = pd.DataFrame()

for filename in os.listdir("../data/questions/all_3_types/"):
    if filename.endswith(".json"):
        df = pd.read_json(f"../data/questions/all_3_types/{filename}", lines=True)
        df_wh = df[df.type=='wh'].copy()
        domain_topic_name = filename.replace('_questions.json', '')
        # if domain_topic_name != 'places_landmark':
        #     continue
        print(f"file name: {filename}")

        ls_output = []
        for i in tqdm(df_wh.index):
            question, label = df_wh.loc[i, 'question'], df_wh.loc[i, 'label']
            # user_msg_qa = Wh_content + "\nQuestion:" + question  # places_landmark_old.csv
            user_msg_qa = f'Question: {question}. Answer:'
            messages_qa = [{"role": "system", "content": system_msg_qa}, {"role": "user", "content": user_msg_qa}]
            output_qa = get_response(model_qa, tok_qa, messages_qa, max_new_tokens=16)
            ls_output.append(output_qa)
            # print(f"Question: {question} Label: {label} | Prediction: {output_decoded}")
        
        df_wh[f"output_{model_id_format}"] = ls_output
        if not os.path.exists(f"../data/questions/wh_only/{model_id_format}"):
            os.makedirs(f"../data/questions/wh_only/{model_id_format}")
        df_wh.to_csv(f"../data/questions/wh_only/{model_id_format}/{domain_topic_name}.csv", index=False)
        # print("df_wh.shape:", df_wh.shape)
        df_all_topics = pd.concat([df_all_topics, df_wh], axis=0)
        df_all_topics['topic'] = domain_topic_name

df_all_topics.columns = ['topic', 'type', 'subject', 'relation', 'object', 'question', 'label', f'output_{model_id_format}']
df_all_topics.to_csv(f"../data/questions/wh_only/all_topics_{model_id_format}.csv", index=False)
del model_qa
gc.collect()
torch.cuda.empty_cache()

mistral_7b_instruct_v0.3
file name: places_city_questions.json


100%|██████████| 500/500 [03:19<00:00,  2.50it/s]


df_wh.shape: (500, 7)
file name: human_actor_questions.json


100%|██████████| 500/500 [03:31<00:00,  2.36it/s]


df_wh.shape: (500, 7)
file name: art_sculpture_questions.json


100%|██████████| 500/500 [03:40<00:00,  2.27it/s]


df_wh.shape: (500, 7)
file name: art_literary_questions.json


100%|██████████| 500/500 [03:53<00:00,  2.14it/s]


df_wh.shape: (500, 7)
file name: places_landmark_questions.json


100%|██████████| 500/500 [03:30<00:00,  2.37it/s]


df_wh.shape: (500, 7)
file name: health_treatment_questions.json


100%|██████████| 116/116 [01:00<00:00,  1.90it/s]


df_wh.shape: (116, 7)
file name: health_medication_questions.json


100%|██████████| 314/314 [02:48<00:00,  1.87it/s]


df_wh.shape: (314, 7)
file name: event_sport_questions.json


100%|██████████| 500/500 [03:17<00:00,  2.53it/s]


df_wh.shape: (500, 7)
file name: event_history_questions.json


100%|██████████| 500/500 [04:16<00:00,  1.95it/s]


df_wh.shape: (500, 7)
file name: human_writer_questions.json


100%|██████████| 500/500 [03:36<00:00,  2.31it/s]


df_wh.shape: (500, 7)
file name: health_disease_questions.json


100%|██████████| 500/500 [04:16<00:00,  1.95it/s]


df_wh.shape: (500, 7)
file name: places_country_questions.json


100%|██████████| 500/500 [02:44<00:00,  3.03it/s]


df_wh.shape: (500, 7)
file name: human_politician_questions.json


100%|██████████| 500/500 [03:29<00:00,  2.39it/s]


df_wh.shape: (500, 7)
file name: event_film_questions.json


100%|██████████| 500/500 [03:04<00:00,  2.71it/s]


df_wh.shape: (500, 7)


In [None]:
df_all_topics = pd.DataFrame()
for filename in os.listdir("../data/questions/wh_only/mistral_7b_instruct_v0.3"):
    tmp = pd.read_csv(f"../data/questions/wh_only/mistral_7b_instruct_v0.3/{filename}")
    df_all_topics = pd.concat([df_all_topics, tmp], axis=0)
    df_all_topics['topic'] = domain_topic_name
    print(filename, df_all_topics.shape)

df_all_topics.columns = ['topic', 'type', 'subject', 'relation', 'object', 'question', 'label', f'output_{model_id_format}']
df_all_topics.to_csv(f"../data/questions/wh_only/all_topics_{model_id_format}.csv", index=False)

## Output Eval

In [2]:
device_eval = 'cuda:6'
model_id_eval = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tok_eval = transformers.AutoTokenizer.from_pretrained(model_id_eval)
terminators = [tok_eval.eos_token_id, tok_eval.convert_tokens_to_ids("<|eot_id|>")]
model_eval = transformers.AutoModelForCausalLM.from_pretrained(model_id_eval, torch_dtype='auto').to(device_eval)

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
domain_topic_name = 'places_landmark'

df_wh = pd.read_csv(f"../data/questions/wh_only/{model_id_format}/{domain_topic_name}.csv")

system_msg_eval = """Given a question, a label, and a prediction, evaluate the correctness of the prediction compared to the label. \
Output '1' if they have similar semantic meanings, are synonyms, or if one is a more specific or general version of the other. Otherwise, output '0'. \
Only output the final evaluation as a single word. Do not repeat the question or provide an explanation."""

wh_count = 0
wh_correct = 0
for i in df_wh.index[:]:
    question, label, output_qa = df_wh.loc[i, 'question'], df_wh.loc[i, 'label'], df_wh.loc[i, f"output_{model_id_format}"]
    prompt_eval = f"""question: {question} \nlabel: {label} \nprediction: {output_qa}\n"""
    eval_res = 0
    wh_count += 1 

    if output_qa.lower() in label.lower() or label.lower() in output_qa.lower():  # Rule-basd fuzzy match
        wh_correct += 1
        eval_res = 1
    else:
        user_msg_eval = f"""question: {question} \nlabel: {label} \nprediction: {output_qa}\n"""
        messages_eval = [{"role": "system", "content": system_msg_eval}, {"role": "user", "content": user_msg_eval}]
        response_eval = get_response(model_eval, tok_eval, messages_eval)
        if response_eval == '1':
            wh_correct += 1
            eval_res = 1
            
    df_wh.loc[i, f"eval_{model_id_format}"] = eval_res
    
print(f"The wh question accuracy of the language model is {wh_correct / wh_count}")
if not os.path.exists(f"../data/questions/wh_only/hallucination_only/{model_id_format}"):
    os.makedirs(f"../data/questions/wh_only/hallucination_only/{model_id_format}")
df_wh[df_wh[f"eval_{model_id_format}"] == 0].to_csv(f"../data/questions/wh_only/hallucination_only/{model_id_format}/{topic_name}.csv", index=False)
df_wh[df_wh[f"eval_{model_id_format}"] == 1]
# The wh question accuracy of the language model is 



The wh question accuracy of the language model is 0.656


Unnamed: 0,type,subject,relation,object,question,label,output_mistral_7b_instruct_v0.3,eval_mistral_7b_instruct_v0.3
0,wh,Old Royal Naval College,architect,Christopher Wren,Who does Old Royal Naval College architect?,Christopher Wren,Christopher Wren and Nicholas Hawksmoor.,1.0
1,wh,Old Royal Naval College,country,United Kingdom,What is the country of Old Royal Naval College?,United Kingdom,United Kingdom,1.0
2,wh,Greece,owned by,Parthenon,Which tourist attraction was owned by Greece?,Parthenon,Acropolis (Greece),1.0
3,wh,Panathenaic Stadium,occupant,Hellenic Olympic Committee,What is the occupant of Panathenaic Stadium?,Hellenic Olympic Committee,Athletes (for events) or spectators (when not ...,1.0
4,wh,Panathenaic Stadium,made from material,marble,What is the made from material of Panathenaic ...,marble,Marble,1.0
...,...,...,...,...,...,...,...,...
493,wh,MUNCH,architect,Juan Herreros,Who does MUNCH architect?,Juan Herreros,Zaha Hadid Architects (until her death in 2016,1.0
496,wh,Bridget of Sweden,founded by,Vadstena Abbey,Which tourist attraction was founded by Bridge...,Vadstena Abbey,Vadstena Abbey,1.0
497,wh,Smithsonian Marine Station at Fort Pierce,has part(s),National Museum of Natural History,Which tourist attraction has part(s) Smithsoni...,National Museum of Natural History,"Fort Pierce, Florida",1.0
498,wh,St Mark's Campanile,has part(s),St Mark's Basilica,Which tourist attraction has part(s) St Mark's...,St Mark's Basilica,St Mark's Square (Piazza San Marco),1.0


In [10]:
# use pre-edit prompt to double_check only hallucinated answers evaluate if the prediction and the correct answer match semantically.
df = pd.read_csv(f"../data/questions/wh_only/hallucination_only/{model_id_format}/{topic_name}.csv")
print(df.shape)

# def eval_double_check():
for i in df.index[:]:
    question, label, output = df.loc[i, 'question'], df.loc[i, 'label'], df.loc[i, f"output_{model_id_format}"]
    prompt_eval = f"""question: {question} \nlabel: {label} \nprediction: {output}\n"""

    if output.lower() in label.lower() or label.lower() in output.lower():  # Rule-basd fuzzy match
        response_eval = '1'
    else:
        user_msg_eval = f"""question: {question} \nlabel: {label} \nprediction: {output_qa}\n"""
        messages_eval = [{"role": "system", "content": system_msg_eval}, {"role": "user", "content": user_msg_eval}]
        response_eval = get_response(model_eval, tok_eval, messages_eval)
    if response_eval != '0':
        print(f"===== Check Prompt: {question} | Output: {output} | Label: {label}. | LLM Evaluation: {response_eval} =====") # (1 denotes correct) 

(255, 8)




### Other evaluation less/more strict

In [7]:
# May output other than 0 or 1
# system_msg_eval = """Given a question, a correct answer, and a prediction, evaluate if the prediction and the correct answer match semantically. \
# Output '1' if they have similar meanings, are synonyms, or if one is a more specific or general version of the other. Otherwise, output '0'."""

system_msg_eval = """Given a question, a correct answer, and a prediction, evaluate whether the prediction and the correct answer match semantically. \
Output '1' if they convey similar meanings, including when the prediction is more specific, more general, or a synonym of the correct answer. Otherwise, output '0'."""

wh_count = 0
wh_correct = 0
for i in df_wh.index[:]:
    question, label, output = df_wh.loc[i, 'question'], df_wh.loc[i, 'label'], df_wh.loc[i, f"output_{model_id}"]
    prompt_eval = f"""The inputs are given as below: \nquestion: {question} \n\ncorrect answer: {label} \n\nprediction: {output}\n"""

    eval_res = 0
    wh_count += 1

    if output.lower() in label.lower() or label.lower() in output.lower():  # Rule-basd fuzzy match
        wh_correct += 1
        eval_res = 1
    else:
        messages = [{"role": "system", "content": system_msg_eval}, {"role": "user", "content": prompt_eval+" Only output '1' or '0'."}]
        msg_tokenized = tok.apply_chat_template(messages, add_generation_prompt=True, return_tensors='pt')
        output_ids = model_eval.generate(msg_tokenized.to(device_eval), max_new_tokens=1, eos_token_id=terminators, do_sample=False, pad_token_id=tok.eos_token_id)
        response_str = tok.decode(output_ids[0][msg_tokenized.shape[-1]:], skip_special_tokens=True)

        if response_str == '1':
            wh_correct += 1
            eval_res = 1
            
    df_wh.loc[i, f"eval_{model_id_format}"] = eval_res
print(f"[GPT] The wh question accuracy of the language model is {wh_correct / wh_count}")
df_wh[df_wh[f"eval_{model_id_format}"] == 0].to_csv(f"../data/questions/wh_only/hallucination_only/{topic_name}_{model_id_format}_eval.csv", index=False)
df_wh[df_wh[f"eval_{model_id_format}"] == 1]



[GPT] The wh question accuracy of the language model is 0.732


Unnamed: 0,type,subject,relation,object,question,label,output_meta-llama/Meta-Llama-3-8B-Instruct,eval_meta_llama_3_8b_instruct
0,wh,Ontario,located in the administrative territorial entity,Niagara Falls,Which tourist attraction's located in the admi...,Niagara Falls,Niagara Falls,1.0
1,wh,Alexandrov Kremlin,country,Russia,What is the country of Alexandrov Kremlin?,Russia,Russia,1.0
2,wh,Alexandrov Kremlin,located in the administrative territorial entity,Alexandrov,Who is the located in the administrative terri...,Alexandrov,Alexandrov,1.0
3,wh,Bukit Panjang,located in the administrative territorial entity,Bukit Timah Nature Reserve,Which tourist attraction's located in the admi...,Bukit Timah Nature Reserve,Haw Par Villa,0.0
4,wh,Kastelholm Castle,country,Finland,What is the country of Kastelholm Castle?,Finland,Finland,1.0
...,...,...,...,...,...,...,...,...
495,wh,Thornton Tomasetti,structural engineer,Petronas Towers,Which tourist attraction's structural engineer...,Petronas Towers,One World Trade,0.0
496,wh,Charles II of England,occupant,Windsor Castle,Which tourist attraction's occupant is Charles...,Windsor Castle,Westminster Abbey,1.0
497,wh,Charles II of England,founded by,Royal Observatory,Which tourist attraction was founded by Charle...,Royal Observatory,St. Paul's,0.0
498,wh,Gateway Arch,located in protected area,Gateway Arch National Park,What is the located in protected area of Gatew...,Gateway Arch National Park,Jefferson National Park,1.0


In [16]:
from sentence_transformers import SentenceTransformer, util

ls_label = df_wh.label.tolist()

wh_count = 0
wh_correct = 0
model_name = 'paraphrase-MiniLM-L6-v2'
model = SentenceTransformer(model_name)
for i in df_wh.index[:]:
    question, label = df_wh.loc[i, 'question'], df_wh.loc[i, 'label']
    output = df_wh.loc[i, f"output_{model_id}"]
    flag = False

    wh_count += 1
    embeddings = model.encode([label, output])
    similarity_score = util.cos_sim(embeddings[0], embeddings[1])
    threshold = 0.6
    if similarity_score >= threshold:
        wh_correct += 1
        flag = True
        
wh_acc_dict = {"wh_accuracy": wh_correct/wh_count}
print(f"[Sentence Transformer] The wh question accuracy of the language model is {wh_correct / wh_count}")

[Sentence Transformer] The wh question accuracy of the language model is 0.418


In [18]:
llm_name = 'gpt-35-turbo'

wh_count = 0
wh_correct = 0
for i in df_wh.index[:]:
    question, label = df_wh.loc[i, 'question'], df_wh.loc[i, 'label']
    output = df_wh.loc[i, f"output_{model_id}"]
    
    info = "Question: "+question+ "\nModel Answer: "+label + "\nAI model generated answer: " + output \
        + "\n. The above question's topic is " + topic_name  + ". "
    string = """\n Please evaluate the correctness of the AI model's answer compared to the model answer. 
        Consider the following criteria and provide your judgment:
        If the AI's answer is a more specific version of the model answer, please respond with: "Correct"
        If the AI's answer is a more general version of the model answer, please respond with: "Correct".
        If the AI's answer is a closely related to the model answer, please respond with: "Correct".
        If the AI's answer and the model answer are entirely different entities with no direct relationship, please respond with: "Incorrect".
        """

    wh_count += 1
    raw_response = client.chat.completions.create(
        model=llm_name, 
        messages=[{"role": "system", "content": ""}, {"role": "user", "content": info + string}], 
        temperature=0
    )
    response_str = raw_response.choices[0].message.content.strip().replace('\n\n\n', '\n\n')
    # response_str

    if response_str and response_str.rstrip('.') == "Correct":
        wh_correct += 1
        df_wh.loc[i, f"eval_{model_id}"] = 1
    else:
        df_wh.loc[i, f"eval_{model_id}"] = 0
print(f"[GPT] The wh question accuracy of the language model is {wh_correct / wh_count}")
df_wh

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[GPT] The wh question accuracy of the language model is 0.694


Unnamed: 0,type,subject,relation,object,question,label,output_meta-llama/Meta-Llama-3-8B-Instruct,eval_meta-llama/Meta-Llama-3-8B-Instruct
1000,wh,Ontario,located in the administrative territorial entity,Niagara Falls,Which tourist attraction's located in the admi...,Niagara Falls,Niagara Falls,1.0
1001,wh,Alexandrov Kremlin,country,Russia,What is the country of Alexandrov Kremlin?,Russia,Russia,1.0
1002,wh,Alexandrov Kremlin,located in the administrative territorial entity,Alexandrov,Who is the located in the administrative terri...,Alexandrov,Alexandrov,1.0
1003,wh,Bukit Panjang,located in the administrative territorial entity,Bukit Timah Nature Reserve,Which tourist attraction's located in the admi...,Bukit Timah Nature Reserve,Haw Par Villa,0.0
1004,wh,Kastelholm Castle,country,Finland,What is the country of Kastelholm Castle?,Finland,Finland,1.0
...,...,...,...,...,...,...,...,...
1495,wh,Thornton Tomasetti,structural engineer,Petronas Towers,Which tourist attraction's structural engineer...,Petronas Towers,One World Trade,0.0
1496,wh,Charles II of England,occupant,Windsor Castle,Which tourist attraction's occupant is Charles...,Windsor Castle,Westminster Abbey,0.0
1497,wh,Charles II of England,founded by,Royal Observatory,Which tourist attraction was founded by Charle...,Royal Observatory,St. Paul's,0.0
1498,wh,Gateway Arch,located in protected area,Gateway Arch National Park,What is the located in protected area of Gatew...,Gateway Arch National Park,Jefferson National Park,1.0


In [19]:
llm_name = 'gpt-35-turbo'
topic = 'health_medication'

system_msg_eval = "Given two texts, labeled as Text 1 and Text 2, output '1' if they match each other semantically, and output '0' if they do not."

wh_count = 0
wh_correct = 0
for i in df_wh.index[:]:
    question, label = df_wh.loc[i, 'question'], df_wh.loc[i, 'label']
    output = df_wh.loc[i, f"output_{model_id}"]
    
    # info = "Question: "+question+ "\nModel Answer: "+label + "\nAI model generated answer: " + output \
    #     + "\n. The above question's topic is " + topic  + ". "
    prompt_eval = f"""The input texts are given as below: \nText 1: {label} \n\nText 2: {output}\n"""
    
    wh_count += 1
    raw_response = client.chat.completions.create(
        model=llm_name, 
        messages=[{"role": "system", "content": system_msg_eval}, {"role": "user", "content": prompt_eval}], 
        temperature=0
    )
    response_str = raw_response.choices[0].message.content.strip().replace('\n\n\n', '\n\n')

    if str(response_str) == '1':
        wh_correct += 1
        df_wh.loc[i, f"eval_{model_id}"] = 1
    else:
        df_wh.loc[i, f"eval_{model_id}"] = 0
print(f"[GPT] The wh question accuracy of the language model is {wh_correct / wh_count}")
df_wh

[GPT] The wh question accuracy of the language model is 0.252


Unnamed: 0,type,subject,relation,object,question,label,output_meta-llama/Meta-Llama-3-8B-Instruct,eval_meta-llama/Meta-Llama-3-8B-Instruct
1000,wh,Ontario,located in the administrative territorial entity,Niagara Falls,Which tourist attraction's located in the admi...,Niagara Falls,Niagara Falls,1.0
1001,wh,Alexandrov Kremlin,country,Russia,What is the country of Alexandrov Kremlin?,Russia,Russia,1.0
1002,wh,Alexandrov Kremlin,located in the administrative territorial entity,Alexandrov,Who is the located in the administrative terri...,Alexandrov,Alexandrov,1.0
1003,wh,Bukit Panjang,located in the administrative territorial entity,Bukit Timah Nature Reserve,Which tourist attraction's located in the admi...,Bukit Timah Nature Reserve,Haw Par Villa,0.0
1004,wh,Kastelholm Castle,country,Finland,What is the country of Kastelholm Castle?,Finland,Finland,1.0
...,...,...,...,...,...,...,...,...
1495,wh,Thornton Tomasetti,structural engineer,Petronas Towers,Which tourist attraction's structural engineer...,Petronas Towers,One World Trade,0.0
1496,wh,Charles II of England,occupant,Windsor Castle,Which tourist attraction's occupant is Charles...,Windsor Castle,Westminster Abbey,0.0
1497,wh,Charles II of England,founded by,Royal Observatory,Which tourist attraction was founded by Charle...,Royal Observatory,St. Paul's,0.0
1498,wh,Gateway Arch,located in protected area,Gateway Arch National Park,What is the located in protected area of Gatew...,Gateway Arch National Park,Jefferson National Park,0.0


In [21]:
df_wh[df_wh[f"eval_{model_id}"]==1]

Unnamed: 0,type,subject,relation,object,question,label,output_meta-llama/Meta-Llama-3-8B-Instruct,eval_meta-llama/Meta-Llama-3-8B-Instruct
1000,wh,Ontario,located in the administrative territorial entity,Niagara Falls,Which tourist attraction's located in the admi...,Niagara Falls,Niagara Falls,1.0
1001,wh,Alexandrov Kremlin,country,Russia,What is the country of Alexandrov Kremlin?,Russia,Russia,1.0
1002,wh,Alexandrov Kremlin,located in the administrative territorial entity,Alexandrov,Who is the located in the administrative terri...,Alexandrov,Alexandrov,1.0
1004,wh,Kastelholm Castle,country,Finland,What is the country of Kastelholm Castle?,Finland,Finland,1.0
1012,wh,John Rylands Library,country,United Kingdom,What is the country of John Rylands Library?,United Kingdom,United Kingdom,1.0
...,...,...,...,...,...,...,...,...
1474,wh,Jōshin'etsu-kōgen National Park,located in protected area,Shiga Highlands,Which tourist attraction's located in protecte...,Shiga Highlands,Shiga Kogen,1.0
1475,wh,Mount Kilimanjaro,country,Tanzania,What is the country of Mount Kilimanjaro?,Tanzania,Tanzania,1.0
1484,wh,Night Safari,country,Singapore,What is the country of Night Safari?,Singapore,Singapore,1.0
1489,wh,St Paul's Cathedral,architect,Christopher Wren,Who does St Paul's Cathedral architect?,Christopher Wren,Christopher Wren,1.0


In [20]:
system_msg_eval = """Given a question, a correct answer, and a prediction, evaluate whether the prediction is semantically equivalent to the correct answer. \
Output '1' if they are semantically equivalent, otherwise output '0'."""

wh_count = 0
wh_correct = 0
for i in df_wh.index[:]:
    question, label = df_wh.loc[i, 'question'], df_wh.loc[i, 'label']
    output = df_wh.loc[i, f"output_{model_id}"]
    
        # + "\n. The above question's topic is " + topic  + ". "
    prompt_eval = f"""The inputs are given as below: \nquestion: {question} \n\ncorrect answer: {label} \n\nprediction: {output}\n"""
    
    wh_count += 1
    raw_response = client.chat.completions.create(
        model=llm_name, 
        messages=[{"role": "system", "content": system_msg_eval}, {"role": "user", "content": prompt_eval}], 
        temperature=0
    )
    response_str = raw_response.choices[0].message.content.strip().replace('\n\n\n', '\n\n')

    if str(response_str) == '1':
        wh_correct += 1
        df_wh.loc[i, f"eval_{model_id}"] = 1
    else:
        df_wh.loc[i, f"eval_{model_id}"] = 0
print(f"[GPT] The wh question accuracy of the language model is {wh_correct / wh_count}")
df_wh

[GPT] The wh question accuracy of the language model is 0.278


Unnamed: 0,type,subject,relation,object,question,label,output_meta-llama/Meta-Llama-3-8B-Instruct,eval_meta-llama/Meta-Llama-3-8B-Instruct
1000,wh,Ontario,located in the administrative territorial entity,Niagara Falls,Which tourist attraction's located in the admi...,Niagara Falls,Niagara Falls,1.0
1001,wh,Alexandrov Kremlin,country,Russia,What is the country of Alexandrov Kremlin?,Russia,Russia,1.0
1002,wh,Alexandrov Kremlin,located in the administrative territorial entity,Alexandrov,Who is the located in the administrative terri...,Alexandrov,Alexandrov,1.0
1003,wh,Bukit Panjang,located in the administrative territorial entity,Bukit Timah Nature Reserve,Which tourist attraction's located in the admi...,Bukit Timah Nature Reserve,Haw Par Villa,0.0
1004,wh,Kastelholm Castle,country,Finland,What is the country of Kastelholm Castle?,Finland,Finland,1.0
...,...,...,...,...,...,...,...,...
1495,wh,Thornton Tomasetti,structural engineer,Petronas Towers,Which tourist attraction's structural engineer...,Petronas Towers,One World Trade,0.0
1496,wh,Charles II of England,occupant,Windsor Castle,Which tourist attraction's occupant is Charles...,Windsor Castle,Westminster Abbey,0.0
1497,wh,Charles II of England,founded by,Royal Observatory,Which tourist attraction was founded by Charle...,Royal Observatory,St. Paul's,0.0
1498,wh,Gateway Arch,located in protected area,Gateway Arch National Park,What is the located in protected area of Gatew...,Gateway Arch National Park,Jefferson National Park,0.0


### Generate yes_no, portability, and locality questions

In [15]:
df_wh_hallu = pd.read_csv(f"../data/questions/wh_only/hallucination_only/{model_id_format}/{topic_name}.csv")
df = pd.read_json(f"../data/questions/all_3_types/{topic_name}_questions.json", lines=True)
len(df_wh_hallu), len(df)

(172, 1500)

In [27]:
# Only a small part of yes_no and MC questions have same subject, object, and relation as the hallucinated wh questions
for i in df_wh_hallu.index[:]:
    subject, relation, object = df_wh_hallu.loc[i, 'subject'], df_wh_hallu.loc[i, 'relation'], df_wh_hallu.loc[i, 'object']
    df_other_type = df[(df.subject==subject) & (df.relation==relation) & (df.object==object) & (df.type!='wh')]
    # print(len(df_other_type))
    # Add yes_no and MC questions to the df_wh_hallu as new columns named 'question_yes_no' and 'question_MC'
    for j in df_other_type.index:
        other_type = df_other_type.loc[j, 'type']
        df_wh_hallu.loc[i, f'question_{other_type}'] = df_other_type.loc[j, 'question']
print(df_wh_hallu[df_wh_hallu.question_yes_no.notna() & df_wh_hallu.question_MC.notna()].shape)
# df_wh_hallu

(21, 10)

In [36]:
df_wh_hallu.head()

Unnamed: 0,type,subject,relation,object,question,label,output_mistral_7b_instruct_v0.3,eval_mistral_7b_instruct_v0.3,question_yes_no,question_MC
0,wh,Panathenaic Stadium,architectural style,ancient Greek architecture,What is the architectural style of Panathenaic...,ancient Greek architecture,Neoclassical,0.0,Is ancient Greek architecture the architectura...,What is the architectural style of Panathenaic...
1,wh,Deloitte,sponsor,MUNCH,Which tourist attraction sponsor Deloitte?,MUNCH,Deloitte does not sponsor a specific tourist a...,0.0,,
2,wh,Rosersberg Palace,architectural style,Neoclassical architecture,What is the architectural style of Rosersberg ...,Neoclassical architecture,Baroque,0.0,Is Neoclassical architecture the architectural...,What is the architectural style of Rosersberg ...
3,wh,Prayerbook Cross,has part(s),Golden Gate Park,Which tourist attraction has part(s) Prayerboo...,Golden Gate Park,St. Peter's Basilica (Vatican City),0.0,,Which tourist attraction has part(s) Prayerboo...
4,wh,Carlos Oswald,creator,Christ the Redeemer,Which tourist attraction's creator is Carlos O...,Christ the Redeemer,"The Teatro Colón in Buenos Aires, Argentina.",0.0,,Which tourist attraction's creator is Carlos O...


In [37]:
# Use GPT-4o to generate other types of questions
from openai import AzureOpenAI

def load_api_key(key, file_path='api_key.json'):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data[key]

# client = AzureOpenAI(api_key=load_api_key('api_key_n_central_us'), api_version='2023-05-15', azure_endpoint="https://n-central-us.openai.azure.com/")
client = AzureOpenAI(api_key=load_api_key('api_key_east_us'), api_version='2023-05-15', azure_endpoint="https://east-us-one.openai.azure.com/")

system_msg_gen_q = "Given a question, generate a question of a different type that has the same subject, object, and relation. The new question should be of the same topic as the original question."

In [None]:
import tiktoken
tokenizer = tiktoken.get_encoding("gpt-3.5-turbo")

# Function to compute token count for a text
def compute_token_count(text):
    tokens = tokenizer.encode(text)
    return len(tokens)

# Apply the function to each column and create new columns with the token count
for column in df_wh_hallu.columns:
    df[f'{column}_token_count'] = df[column].apply(compute_token_count)

In [39]:
system_msg_gen_q = """ Given a knowledge triplet (subject, relation, object), and a question asking about the object, the answer should be the object
First generate a paraphrased question.

Next generate a locality question. Each locality question should meet the following criteria:
1. Use the exact same subjects from the list of subjects.
2. Ask about a well-known, simple attribute of the same subject. The new attribute of the same subject should be completely unrelated to the original question.
3. Frame each locality question so that it can be answered with yes or no.

Finally, generate alternative portability (Portability means subject aliasing) questions based on the questions. Each portability question should replace the question's subject with an alias or synonym.

Output in JSON format with following keys: paraphrased_questions, yes_no_questions, locality_questions, and portability_questions.
"""
# Make sure that all the subjects across the questions, paraphrased_questions, yes_no_questions and locality_questions are the same.

example = """Example input:
Drinking boiled garlic water cures COVID-19 by killing the virus instantly in your system.
Eating apple seeds can cure all types of cancer because they contain a magic compound called amygdalin that specifically targets cancer cells without affecting healthy cells.
Vaccines contain microchips that the government uses to track your movements and control your thoughts.

Example output:
{
    "simplified_texts": [
        'Boiled garlic water cures COVID-19.',
        'Apple seeds cure cancer.',
        'Vaccines contain microchips.',
    ],
    "answers": [
        'COVID-19',
        'cancer',
        'microchips',
    ],
    "questions": [
        'What does boiled garlic water cure?',
        'What do apple seeds cure?',
        'What do vaccines contain?',
    ],
    "subjects": [
        'boiled garlic water',
        'apple seeds',
        'vaccines'
    ],
    "paraphrased_questions": [
        'What does boiled garlic water heal?',
        'What can be cured by eating apple seeds?',
        'What is contained in vaccines?',
    ],
    "yes_no_questions": [
        'Does boiled garlic water cure COVID-19?',
        'Can apple seeds cure cancer?',
        'Do vaccines contain microchips?',
    ],
    "locality_questions": [
        'Is boiled garlic water safe to drink?',
        'Are apple seeds toxic if consumed?',
        'Are vaccines recommended by health professionals?',
    ],
    "portability_questions": [
        'What does garlic-infused water cure?',
        'What do apple pips cure?',
        'What does inoculation contain?',
    ]
}
Note that all the subjects across the questions, paraphrased_questions, yes_no_questions must be exactly the same as they are in the subjects list (including letter cases).
"""

paraphrased_questions, yes_no_questions, locality_questions, portability_questions = ([] for _ in range(4))


def generate_questions(df_wh_hallu):
    for i in df_wh_hallu.index[:2]:
        # print(i, input_texts)
        subject, relation, object, question = df_wh_hallu.loc[i, 'subject'], df_wh_hallu.loc[i, 'relation'], df_wh_hallu.loc[i, 'object'], df_wh_hallu.loc[i, 'question']
        prompt_gen_q = f"subject: {subject}, relation: {relation}, object: {object}, question: {question}"
        raw_response = client.chat.completions.create(
            model='gpt-4o-mini', 
            messages=[{"role": "system", "content": system_msg_gen_q}, {"role": "user", "content": prompt_gen_q}], 
            temperature=0
        )
        json_obj = json.loads(raw_response.choices[0].message.content)
        print(json_obj)
        paraphrased_questions.extend(json_obj['paraphrased_questions'])
        yes_no_questions.extend(json_obj['yes_no_questions'])
        locality_questions.extend(json_obj['locality_questions'])
        portability_questions.extend(json_obj['portability_questions'])
        print(len(paraphrased_questions), len(yes_no_questions), len(locality_questions), len(portability_questions))
generate_questions(df_wh_hallu)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

## Editing

In [None]:
import json
from hallucination_editor import BaseEditor
from easyeditor import FTHyperParams, IKEHyperParams, ROMEHyperParams, MEMITHyperParams

df = pd.read_csv(f"../data/questions/wh_only/hallucination_only/{model_id_format}/{topic_name}.csv")
# df = pd.read_csv(f"../data/questions/wh_only/hallucination_only/meta_llama_3.1_8b_instruct/{topic_name}.csv")
df.shape

In [None]:
n = 50#len(df)
targets = df['label'].tolist()[:n]
subjects = df['subject'].tolist()[:n]
questions = df['question'].tolist()[:n]
# paraphrased_questions = df['paraphrased_questions'].tolist()[:n]
# portability_questions = df['portability_questions'].tolist()[:n]
# portability_inputs = {'subject_aliasing': {'prompt': portability_questions, 'ground_truth': answers},}

hparams = ROMEHyperParams.from_hparams('./hparams/ROME/llama3-8b')
# hparams = ROMEHyperParams.from_hparams('./hparams/ROME/gemma-7b')
# hparams = MEMITHyperParams.from_hparams('./hparams/MEMIT/llama3-8b')

hparams.device = 0
editor = BaseEditor.from_hparams(hparams)
metrics, edited_model, _ = editor.edit(
    prompts=questions,
    # rephrase_prompts=paraphrased_questions,
    target_new=targets,
    subject=subjects,
    # portability_inputs=portability_inputs,
    summary_metrics=True,
    keep_original_weight=True,
    # test_generation=True,
)

json.dump(metrics, open(os.path.join('../results/', f'tmp_ROME_{model_id_format}_results.json'), 'w'), indent=4)
del edited_model
gc.collect()
torch.cuda.empty_cache()