In [1]:
from utils.openai_helpers import query_openai_model, query_openai_model_batch
from utils.wiki_helpers import get_abstraction_nodes, convert_abstraction_qids_to_labels, get_label
from utils.prompt_functions import get_abstraction_mcq_prompt, get_abstraction_mcq_prompt_v2

In [2]:
import pandas as pd
import json
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import numpy as np

In [3]:
# wikidata_properties = {
#     "Geospatial Information": {
#         "Moving Up the Hierarchy": {
#             "Located in the administrative territorial entity": "P131",
#             "Part of": "P361",
#             "Located on terrain feature": "P706"
#         },
#         "Moving Down the Hierarchy": {
#             "Contains administrative territorial entity": "P150",
#             "Location": "P276",
#             "Located on street": "P669"
#         },
#         "Lateral Connections": {
#             "Location": "P276",
#             "Located on terrain feature": "P706"
#         },
#         "General Context": {
#             "Country": "P17",
#             "Instance of": "P31"
#         }
#     },
#     "Occupation and Professional Information": {
#         "Moving Up the Hierarchy": {
#             "Field of work": "P101",
#             "Part of": "P361",
#             "Academic discipline": "P136"
#         },
#         "Moving Down the Hierarchy": {
#             "Has part": "P527",
#             "Subclass of": "P279"
#         },
#         "Lateral Connections": {
#             "Employer": "P108",
#             "Member of": "P463",
#             "Affiliation": "P1416"
#         },
#         "General Context": {
#             "Occupation": "P106",
#             "Position held": "P39"
#         }
#     },
#     "Temporal Information": {
#         "Moving Up the Hierarchy": {
#             "Follows": "P155",
#             "Part of": "P361"
#         },
#         "Moving Down the Hierarchy": {
#             "Has part": "P527",
#             "Subclass of": "P279"
#         },
#         "Lateral Connections": {
#             "Concurrent with": "P1072",
#             "Followed by": "P156"
#         },
#         "General Context": {
#             "Point in time": "P585",
#             "Duration": "P2047"
#         }
#     },
#     "Biological and Taxonomic Information": {
#         "Moving Up the Hierarchy": {
#             "Parent taxon": "P171",
#             "Part of": "P361"
#         },
#         "Moving Down the Hierarchy": {
#             "Taxon rank": "P105",
#             "Has part": "P527"
#         },
#         "Lateral Connections": {
#             "Related to": "P2789",
#             "Similar to": "P1889"
#         },
#         "General Context": {
#             "Instance of": "P31",
#             "Species": "P105"
#         }
#     },
#     "Cultural and Creative Work": {
#         "Moving Up the Hierarchy": {
#             "Part of the series": "P179",
#             "Genre": "P136"
#         },
#         "Moving Down the Hierarchy": {
#             "Has part": "P527",
#             "Subgenre": "P741"
#         },
#         "Lateral Connections": {
#             "Inspired by": "P941",
#             "Derivative work": "P4969"
#         },
#         "General Context": {
#             "Instance of": "P31",
#             "Creative work type": "P1448"
#         }
#     },
#     "Historical and Event Information": {
#         "Moving Up the Hierarchy": {
#             "Part of": "P361",
#             "Significant event": "P793"
#         },
#         "Moving Down the Hierarchy": {
#             "Has part": "P527",
#             "Instance of": "P31"
#         },
#         "Lateral Connections": {
#             "Followed by": "P156",
#             "Follows": "P155"
#         },
#         "General Context": {
#             "Point in time": "P585",
#             "Location": "P276"
#         }
#     },
#     "Organizational Information": {
#         "Moving Up the Hierarchy": {
#             "Parent organization": "P749",
#             "Part of": "P361"
#         },
#         "Moving Down the Hierarchy": {
#             "Subsidiary": "P355",
#             "Branch": "P1833"
#         },
#         "Lateral Connections": {
#             "Affiliate": "P1416",
#             "Member of": "P463"
#         },
#         "General Context": {
#             "Industry": "P452",
#             "Instance of": "P31"
#         }
#     },
#     "Literary and Bibliographic Information": {
#         "Moving Up the Hierarchy": {
#             "Part of the series": "P179",
#             "Genre": "P136"
#         },
#         "Moving Down the Hierarchy": {
#             "Has part": "P527",
#             "Subgenre": "P741"
#         },
#         "Lateral Connections": {
#             "Inspired by": "P941",
#             "Derivative work": "P4969"
#         },
#         "General Context": {
#             "Instance of": "P31",
#             "Author": "P50"
#         }
#     },
#     "Scientific and Technical Information": {
#         "Moving Up the Hierarchy": {
#             "Field of work": "P101",
#             "Part of": "P361"
#         },
#         "Moving Down the Hierarchy": {
#             "Has part": "P527",
#             "Subclass of": "P279"
#         },
#         "Lateral Connections": {
#             "Related to": "P2789",
#             "Similar to": "P1889"
#         },
#         "General Context": {
#             "Instance of": "P31",
#             "Research topic": "P2579"
#         }
#     },
#     "Social and Demographic Information": {
#         "Moving Up the Hierarchy": {
#             "Part of": "P361",
#             "Member of": "P463"
#         },
#         "Moving Down the Hierarchy": {
#             "Subclass of": "P279",
#             "Has part": "P527"
#         },
#         "Lateral Connections": {
#             "Related to": "P2789",
#             "Similar to": "P1889"
#         },
#         "General Context": {
#             "Instance of": "P31",
#             "Demographic group": "P1448"
#         }
#     }
# }



In [4]:
wikidata_properties = {
    "Geospatial Information": {
        "Moving Up the Hierarchy": {
            "Located in the administrative territorial entity": "P131",
            "Part of": "P361",
            "Located on terrain feature": "P706"
        },
        "Moving Down the Hierarchy": {
            "Contains administrative territorial entity": "P150",
            "Location": "P276",
            "Located on street": "P669"
        }
    },
    "Occupation and Professional Information": {
        "Moving Up the Hierarchy": {
            "Field of work": "P101",
            "Part of": "P361",
            "Academic discipline": "P136"
        },
        "Moving Down the Hierarchy": {
            "Has part": "P527",
            "Subclass of": "P279"
        }
    },
    "Temporal Information": {
        "Moving Up the Hierarchy": {
            "Follows": "P155",
            "Part of": "P361"
        },
        "Moving Down the Hierarchy": {
            "Has part": "P527",
            "Subclass of": "P279"
        }
    },
    "Biological and Taxonomic Information": {
        "Moving Up the Hierarchy": {
            "Parent taxon": "P171",
            "Part of": "P361"
        },
        "Moving Down the Hierarchy": {
            "Taxon rank": "P105",
            "Has part": "P527"
        }
    },
    "Cultural and Creative Work": {
        "Moving Up the Hierarchy": {
            "Part of the series": "P179",
            "Genre": "P136"
        },
        "Moving Down the Hierarchy": {
            "Has part": "P527",
            "Subgenre": "P741"
        }
    },
    "Historical and Event Information": {
        "Moving Up the Hierarchy": {
            "Part of": "P361",
            "Significant event": "P793"
        },
        "Moving Down the Hierarchy": {
            "Has part": "P527",
            "Instance of": "P31"
        }
    },
    "Organizational Information": {
        "Moving Up the Hierarchy": {
            "Parent organization": "P749",
            "Part of": "P361"
        },
        "Moving Down the Hierarchy": {
            "Subsidiary": "P355",
            "Branch": "P1833"
        }
    },
    "Literary and Bibliographic Information": {
        "Moving Up the Hierarchy": {
            "Part of the series": "P179",
            "Genre": "P136"
        },
        "Moving Down the Hierarchy": {
            "Has part": "P527",
            "Subgenre": "P741"
        }
    },
    "Scientific and Technical Information": {
        "Moving Up the Hierarchy": {
            "Field of work": "P101",
            "Part of": "P361"
        },
        "Moving Down the Hierarchy": {
            "Has part": "P527",
            "Subclass of": "P279"
        }
    },
    "Social and Demographic Information": {
        "Moving Up the Hierarchy": {
            "Part of": "P361",
            "Member of": "P463"
        },
        "Moving Down the Hierarchy": {
            "Subclass of": "P279",
            "Has part": "P527"
        }
    }
}



In [5]:
df = pd.read_csv('inputs/simple_multihop_examples - examples.csv')

In [6]:
df

Unnamed: 0,GENERATED_QUESTION,ITEM_1,ITEM_2,ITEM_3,ITEM_4,PROP_1,PROP_2,PROP_3
0,What is the seal described by the country that...,Q4871749,Q30,Q171663,Q14213,P17,P418,P2378
1,What is the author of the national anthem of t...,Q5248175,Q145,Q40807,Q1606590,P27,P85,P50
2,What is the main article of the list that is t...,Q669125,Q9726,Q3047934,Q207628,P86,P1455,P360
3,What is the geography associated with the coun...,Q3974535,Q1237470,Q38,Q216989,P175,P495,P2633
4,What is the location of the headquarters of th...,Q1721298,Q2736,Q253414,Q72,P641,P3719,P159
...,...,...,...,...,...,...,...,...
95,What is the name day associated with the first...,Q76535,Q292691,Q2390,Q110,P735,P1750,P361
96,What is the head of government of the administ...,Q2084020,Q170578,Q43783,Q10304754,P19,P131,P6
97,What is the currency used by the country that ...,Q7497242,Q1033,Q5440850,Q1485655,P27,P155,P38
98,What is the history of the writing system used...,Q6174806,Q5576697,Q8229,Q3772237,P734,P282,P2184


In [7]:
# prompts_list = []
# for i in range(len(df)):
#     question = df.iloc[i]['GENERATED_QUESTION']
#     answer_qid = df.iloc[i]['ITEM_4']
#     abstraction_nodes = get_abstraction_nodes(answer_qid, wikidata_properties)
#     abstraction_nodes_names = convert_abstraction_qids_to_labels(abstraction_nodes)
#     combined_abstraction_list = []
#     for key, sub_dict in abstraction_nodes_names.items():
#         for sub_key, value in sub_dict.items():
#             combined_abstraction_list.extend(value)
#     combined_abstraction_list = list(set(combined_abstraction_list))
#     prompt = get_abstraction_mcq_prompt(question, combined_abstraction_list)
#     prompts_list.append(prompt)

In [8]:
def process_row(index, row, wikidata_properties):
    question = row['GENERATED_QUESTION']
    answer_qid = row['ITEM_4']
    answer = get_label(answer_qid)
    abstraction_nodes = get_abstraction_nodes(answer_qid, wikidata_properties)
    abstraction_nodes_names, mapping_label_to_qid = convert_abstraction_qids_to_labels(abstraction_nodes)
    combined_abstraction_list = list(mapping_label_to_qid.keys())

    
    prompt = get_abstraction_mcq_prompt(question, answer, combined_abstraction_list)
    # print(f"Processed row {index}")
    return index, prompt, mapping_label_to_qid, answer

def generate_prompts_multi(df, wikidata_properties, max_workers=10):
    prompts_dict = {}
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_row, i, df.iloc[i], wikidata_properties): i for i in range(len(df))}
        with tqdm(total=len(df)) as pbar:
            for future in as_completed(futures):
                index, result, mapping_dict_abs, answer = future.result()
                prompts_dict[index] = result, mapping_dict_abs, answer
                pbar.update(1)
    return prompts_dict

In [9]:
get_label('Q30')

'United States of America'

In [9]:
prompts_data = generate_prompts_multi(df, wikidata_properties)

100%|██████████| 100/100 [03:29<00:00,  2.09s/it]


In [10]:
print(prompts_data[0][2])

NameError: name 'prompts_data' is not defined

In [None]:
process_row(0, df.iloc[0], wikidata_properties)

Processed row 0


(0,
 "\nFor the following question, choose all suitable candidates from the provided options that can reasonably asnwer the question:\n\nQuestion: \nWhat is the seal described by the country that hosted the Battle of Mobley's Meeting House issued by?\n\nOptions:\n['United States Cabinet', 'United States of America', 'public office', 'foreign minister', 'secretary of state', 'Secretary of State']\n\nRespond with a JSON object with 'answer' key containing a list of the selected options, or an empty list if no suitable options are present.\n",
 {'United States Cabinet': 'Q639738',
  'United States of America': 'Q30',
  'public office': 'Q294414',
  'foreign minister': 'Q7330070',
  'secretary of state': 'Q736559',
  'Secretary of State': 'Q533501'})

In [11]:

np.save('inputs/prompts_list_simple_examples2.npy', prompts_data)

In [10]:
prompts_data = np.load('inputs/prompts_list_simple_examples2.npy', allow_pickle=True).item()

In [12]:
prompts_list = []
for i in range(len(prompts_data)):
    mapping_dic = prompts_data[i][1]
    answer = prompts_data[i][2]
    combined_abstraction_list = list(mapping_dic.keys())
    question = df.iloc[i]['GENERATED_QUESTION']
    prompt = get_abstraction_mcq_prompt(question, answer, combined_abstraction_list)
    prompts_list.append(prompt)

In [15]:
print(prompts_list[8])


Given the following question and a valid answer, identify all suitable candidates from the provided options that can also serve as valid answers to the question. Note that there may be multiple correct answers. Only select options that provide a direct answer to the question and adequately satisfy the information sought.

Question:
What is the occupation of the founder of the university that John Guillim attended?

Valid Answer:
priest

Options:
['religious occupation', 'cleric', 'minister', 'rabbi', 'monk', 'presbyter', 'Christian minister', 'priestess', 'Reverend', 'Catholic priest', 'Latin Catholic priest']

Respond with a JSON object containing a list of the selected options under the key 'answer'. If no suitable options are present, return an empty list.



In [15]:
print(prompts_data[4][0])


Given the following question and a valid answer, identify all suitable candidates from the provided options that can also serve as valid answers to the question. Note that there may be multiple correct answers. Only select options that provide a direct answer to the question and adequately satisfy the information sought.

Question:
What is the location of the headquarters of the regulatory authority of the sport that Kai Gehring plays?

Valid Answer:
Zürich

Options:
['Zürich District', 'Greater Zurich Area', 'RZU', 'Zurich metropolitan area', 'Canton of Zürich', 'Kreis 1', 'District 2', 'Kreis 3', 'Kreis 4', 'Kreis 5', 'District 6', 'District 7', 'Kreis 8', 'District 9', 'District 10', 'District 11', 'Kreis 12', 'Switzerland', 'Old Swiss Confederacy', 'Helvetic Republic', 'First French Empire', 'municipality of Switzerland', 'city of Switzerland', 'cantonal capital of Switzerland', 'capital city', 'college town', 'largest city', 'big city', 'Climate Alliance', 'KlimaBündnis-Städte Sc

In [16]:
prompts = [prompts_data[i][0] for i in range(len(prompts_data))]
mapping_dics = [prompts_data[i][1] for i in range(len(prompts_data))]
answers = [prompts_data[i][2] for i in range(len(prompts_data))]

In [14]:
mapping_dics

[{'United States Cabinet': 'Q639738',
  'United States of America': 'Q30',
  'public office': 'Q294414',
  'foreign minister': 'Q7330070',
  'secretary of state': 'Q736559',
  'Secretary of State': 'Q533501'},
 {'human': 'Q5', 'poet': 'Q49757', 'composer': 'Q36834', 'writer': 'Q36180'},
 {'type of work of art': 'Q116474095',
  'musical work': 'Q2188189',
  'composer': 'Q36834',
  'composition': 'Q462437',
  'musical work/composition': 'Q105543609',
  'literary composing': 'Q1333743',
  'music composing': 'Q11895763',
  'music composition': 'Q105107008'},
 {'Italy': 'Q38',
  'geography of geographic location': 'Q46865913',
  'geography of Europe': 'Q119716'},
 {'Zürich District': 'Q660732',
  'Greater Zurich Area': 'Q30998',
  'RZU': 'Q95080684',
  'Zurich metropolitan area': 'Q690149',
  'Canton of Zürich': 'Q11943',
  'Kreis 1': 'Q445559',
  'District 2': 'Q456153',
  'Kreis 3': 'Q675017',
  'Kreis 4': 'Q677133',
  'Kreis 5': 'Q460885',
  'District 6': 'Q456170',
  'District 7': 'Q456

In [15]:
for i in range(len(prompts)):
    if len(mapping_dics[i]) == 0:
        print(f"Empty mapping dict for index {i}")
        continue

In [16]:
responses = query_openai_model_batch(prompts)

Processing prompts:   0%|          | 0/100 [00:00<?, ?it/s]

Processing prompts: 100%|██████████| 100/100 [01:52<00:00,  1.12s/it]


In [17]:
responses = query_openai_model_batch(prompts_list)

Processing prompts: 100%|██████████| 100/100 [02:26<00:00,  1.47s/it]


In [22]:
print(prompts_list[95])


Given the following question and a valid answer, identify all suitable candidates from the provided options that can also serve as valid answers to the question. Note that there may be multiple correct answers. Only select options that provide a direct answer to the question and adequately satisfy the information sought.

Question:
What is the name day associated with the first name of Wilhelm Oskar Ernst Windisch?

Valid Answer:
March

Options:
['Julian calendar', 'Gregorian calendar', 'Swedish calendar', 'calendar month', 'March 1', 'March 2', 'March 3', 'March 4', 'March 5', 'March 6', 'March 7', 'March 8', 'March 9', 'March 10', 'March 11', 'March 12', 'March 13', 'March 14', 'March 15', 'March 16', 'March 17', 'March 18', 'March 19', 'March 20', 'March 21', 'March 22', 'March 23', 'March 24', 'March 25', 'March 26', 'March 27', 'March 28', 'March 29', 'March 30', 'March 31', 'month of the Gregorian calendar', 'February', 'April', 'Marzec', 'März']

Respond with a JSON object cont

In [18]:
responses

{1: ('{\n  "answer": [\n    "composer"\n  ]\n}',
  CompletionUsage(completion_tokens=13, prompt_tokens=145, total_tokens=158)),
 3: ('{\n  "answer": [\n    "Italy",\n    "geography of Europe"\n  ]\n}',
  CompletionUsage(completion_tokens=20, prompt_tokens=152, total_tokens=172)),
 0: ('{\n  "answer": [\n    "secretary of state",\n    "Secretary of State"\n  ]\n}',
  CompletionUsage(completion_tokens=22, prompt_tokens=167, total_tokens=189)),
 2: ('{\n  "answer": [\n    "musical work",\n    "composition",\n    "musical work/composition",\n    "music composition"\n  ]\n}',
  CompletionUsage(completion_tokens=32, prompt_tokens=180, total_tokens=212)),
 6: ('{\n  "answer": [\n    "As-Salam al-Malaki"\n  ]\n}',
  CompletionUsage(completion_tokens=19, prompt_tokens=158, total_tokens=177)),
 5: ('{\n  "answer": [\n    "secretary of state",\n    "Secretary of State"\n  ]\n}',
  CompletionUsage(completion_tokens=22, prompt_tokens=166, total_tokens=188)),
 7: ('{\n  "answer": [\n    "Uusimaa",\n

In [19]:
abstract_valid_answers = {}
for i in range(len(responses)):
    response = responses[i][0]
    mapping_dict = mapping_dics[i]
    response = json.loads(response)
    abstract_valid_answers[i] = response['answer']

In [23]:
abstract_valid_answers

{0: ['secretary of state', 'Secretary of State'],
 1: ['composer'],
 2: ['musical work',
  'composition',
  'musical work/composition',
  'music composition'],
 3: ['Italy', 'geography of Europe'],
 4: ['Zürich District',
  'Greater Zurich Area',
  'Zurich metropolitan area',
  'Canton of Zürich',
  'Kreis 1',
  'District 2',
  'Kreis 3',
  'Kreis 4',
  'Kreis 5',
  'District 6',
  'District 7',
  'Kreis 8',
  'District 9',
  'District 10',
  'District 11',
  'Kreis 12',
  'city of Switzerland',
  'cantonal capital of Switzerland',
  'capital city',
  'largest city',
  'big city',
  'Zurich'],
 5: ['secretary of state', 'Secretary of State'],
 6: ['As-Salam al-Malaki'],
 7: ['Uusimaa',
  'Finland',
  'Helsinki',
  'Helsinki sub-region',
  'Helsinki metropolitan area',
  'Helsinki capital region'],
 8: ['religious occupation',
  'cleric',
  'minister',
  'monk',
  'presbyter',
  'Christian minister',
  'priestess',
  'Reverend',
  'Catholic priest',
  'Latin Catholic priest'],
 9: ['Cas

In [24]:
df['ABSTRACT_VALID'] = list(abstract_valid_answers.values())
df['ANSWER'] = answers

In [25]:
df

Unnamed: 0,GENERATED_QUESTION,ITEM_1,ITEM_2,ITEM_3,ITEM_4,PROP_1,PROP_2,PROP_3,ABSTRACT_VALID,ANSWER
0,What is the seal described by the country that...,Q4871749,Q30,Q171663,Q14213,P17,P418,P2378,"[secretary of state, Secretary of State]",United States Secretary of State
1,What is the author of the national anthem of t...,Q5248175,Q145,Q40807,Q1606590,P27,P85,P50,[composer],Henry Carey
2,What is the main article of the list that is t...,Q669125,Q9726,Q3047934,Q207628,P86,P1455,P360,"[musical work, composition, musical work/compo...",composed musical work
3,What is the geography associated with the coun...,Q3974535,Q1237470,Q38,Q216989,P175,P495,P2633,"[Italy, geography of Europe]",geography of Italy
4,What is the location of the headquarters of th...,Q1721298,Q2736,Q253414,Q72,P641,P3719,P159,"[Zürich District, Greater Zurich Area, Zurich ...",Zürich
...,...,...,...,...,...,...,...,...,...,...
95,What is the name day associated with the first...,Q76535,Q292691,Q2390,Q110,P735,P1750,P361,"[March 1, March 2, March 3, March 4, March 5, ...",March
96,What is the head of government of the administ...,Q2084020,Q170578,Q43783,Q10304754,P19,P131,P6,"[politician, governor of Sergipe]",Jackson Barreto
97,What is the currency used by the country that ...,Q7497242,Q1033,Q5440850,Q1485655,P27,P155,P38,"[obsolete currency, historical pound]",Nigerian pound
98,What is the history of the writing system used...,Q6174806,Q5576697,Q8229,Q3772237,P734,P282,P2184,[],history of the Latin alphabet


In [26]:
df.to_csv('outputs/simple_multihop_examples_with_abstract_valid3.csv', index=False)

In [28]:
print(prompts_data[1][0])


Given the following question and a valid answer, identify all suitable candidates from the provided options that can also serve as valid answers to the question. Note that there may be multiple correct answers.

Question:
What is the author of the national anthem of the country that Deborah Baxter is a citizen of?

Valid Answer:
Henry Carey

Options:
['human', 'poet', 'composer', 'writer']

Respond with a JSON object containing a list of the selected options under the key 'answer'. If no suitable options are present, return an empty list.



In [None]:
json.loads(responses[1][0])

{'answer': ['poet', 'composer', 'writer']}

In [None]:
qid = 'Q6366688' #'Q76' 
abstraction_nodes = get_abstraction_nodes(qid, wikidata_properties)
abstraction_nodes

{'Geospatial Information': {'Moving Up the Hierarchy': ['Q18094', 'Q5684342'],
  'Moving Down the Hierarchy': ['Q6739512'],
  'Lateral Connections': ['Q6739512'],
  'General Context': ['Q30', 'Q16917']},
 'Occupation and Professional Information': {'Moving Up the Hierarchy': ['Q5684342'],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': []},
 'Temporal Information': {'Moving Up the Hierarchy': ['Q5684342'],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': []},
 'Biological and Taxonomic Information': {'Moving Up the Hierarchy': ['Q5684342'],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': ['Q16917']},
 'Cultural and Creative Work': {'Moving Up the Hierarchy': [],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': ['Q16917']},
 'Historical and Event Information': {'Moving Up the Hierarchy': ['Q5684342'],
  'Moving Down the Hierarchy': ['Q16917'],

In [None]:
abstraction_nodes_names = convert_abstraction_qids_to_labels(abstraction_nodes)

In [None]:
abstraction_nodes_names

{'Geospatial Information': {'Moving Up the Hierarchy': ['Honolulu',
   'Hawaii Pacific Health'],
  'Moving Down the Hierarchy': ['Makiki'],
  'Lateral Connections': ['Makiki'],
  'General Context': ['United States of America', 'hospital']},
 'Occupation and Professional Information': {'Moving Up the Hierarchy': ['Hawaii Pacific Health'],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': []},
 'Temporal Information': {'Moving Up the Hierarchy': ['Hawaii Pacific Health'],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': []},
 'Biological and Taxonomic Information': {'Moving Up the Hierarchy': ['Hawaii Pacific Health'],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': ['hospital']},
 'Cultural and Creative Work': {'Moving Up the Hierarchy': [],
  'Moving Down the Hierarchy': [],
  'Lateral Connections': [],
  'General Context': ['hospital']},
 'Historical and Event Information': {'Mo

In [None]:
combined_abstraction_list = []
for key, sub_dict in abstraction_nodes_names.items():
    for sub_key, value in sub_dict.items():
        combined_abstraction_list.extend(value)

In [None]:
combined_abstraction_list = list(set(combined_abstraction_list))

In [None]:
combined_abstraction_list

['hospital',
 'Honolulu',
 'Makiki',
 'United States of America',
 'Hawaii Pacific Health']

In [None]:
q = 'Where was the 44th president of the United States born?'
prompt = get_abstraction_mcq_prompt(q, combined_abstraction_list)

In [None]:
print(prompt)


For the following question, choose all suitable candidates from the provided options that can reasonably asnwer the question:

Question: 
Where was the 44th president of the United States born?

Options:
['hospital', 'Honolulu', 'Makiki', 'United States of America', 'Hawaii Pacific Health']

Respond with a JSON object with 'answer' key containing a list of the selected options, or an empty list if no suitable options are present.



In [None]:
response = query_openai_model(prompt)

In [None]:
json.loads(response[0])

{'answer': ['Honolulu', 'United States of America']}