In [2]:
import json
import os
import sys

import pandas as pd
from Prompter.prompter import Prompter

In [6]:
CHECKPOINTS = './checkpoints/'


def parse_checkpoint_file(filename):
	filepath = os.path.join(CHECKPOINTS, filename)
	if not os.path.exists(filepath):
		print(f"Checkpoint file {filepath} does not exist.")
		sys.exit(1)


	filename = filename.split('.json')[0]
	model = filename.split('_temp_')[0]
	temperature = filename.split('_temp_')[1].split('_rep_')[0]
	repetition = filename.split('_rep_')[1].split('_')[0]
	question_type = filename.split('_')[-1]

	# Load the JSON file
	with open(filepath, 'r') as f:
		checkpoint_data = json.load(f)

	return model, temperature, repetition, question_type, checkpoint_data


def list_files(directory):
	files = []
	for filename in os.listdir(directory):
		if filename.endswith(".json"):
			files.append(filename)
	return files

print("Listing all the files in the directory...", list_files('./checkpoints/'))
MAIN_FILE = 'prompt_plan_updated.json'
files = list_files(CHECKPOINTS)
for filename in files:
	model, temp, rep, q, data = parse_checkpoint_file(filename)
	print("Parsed checkpoint:", filename)
	

Listing all the files in the directory... ['Athene-v2-Chat-72B_temp_0.0_rep_1_cq.json', 'Athene-v2-Chat-72B_temp_0.0_rep_1_task.json', 'Athene-v2-Chat-72B_temp_0.3_rep_1_cq.json', 'Athene-v2-Chat-72B_temp_0.3_rep_1_task.json', 'Athene-v2-Chat-72B_temp_0.7_rep_1_cq.json', 'Athene-v2-Chat-72B_temp_0.7_rep_1_task.json', 'Athene-v2-Chat-72B_temp_1.0_rep_1_cq.json', 'Athene-v2-Chat-72B_temp_1.0_rep_1_task.json', 'ChatGPT-4o-latest (2025-01-29)_temp_0.0_rep_1_cq.json', 'ChatGPT-4o-latest (2025-01-29)_temp_0.0_rep_1_task.json', 'ChatGPT-4o-latest (2025-01-29)_temp_0.3_rep_1_cq.json', 'ChatGPT-4o-latest (2025-01-29)_temp_0.3_rep_1_task.json', 'ChatGPT-4o-latest (2025-01-29)_temp_0.7_rep_1_cq.json', 'ChatGPT-4o-latest (2025-01-29)_temp_0.7_rep_1_task.json', 'ChatGPT-4o-latest (2025-01-29)_temp_1.0_rep_1_cq.json', 'ChatGPT-4o-latest (2025-01-29)_temp_1.0_rep_1_task.json', 'Command A (03-2025)_temp_0.0_rep_1_cq.json', 'Command A (03-2025)_temp_0.0_rep_1_task.json', 'Command A (03-2025)_temp_0.3_r

In [9]:
# Merge files with 1 rep
with open('prompts_plan_TOP_5_1_REP.json', 'r') as first:
	data_1 = json.load(first)
with open('prompts_plan_NEXT_5_1_REP.json', 'r') as second:
	data_2 = json.load(second)

with open('prompts_plan_merged_1_REP.json', 'w') as output:
	data_1['prompts'].extend(data_2['prompts'])
	json.dump(data_1, output, indent=4)

In [8]:
# Big Merge
with open('prompts_plan_merged_1_REP.json', 'r') as completed:
	data_completed = json.load(completed)

with open('prompts_plan.json', 'r') as incomplete:
	data_incomplete = json.load(incomplete)

for llm_id, llms in enumerate(data_completed['prompts']):
	llm = llms['llm']

	# Check if the model is in the incomplete data
	if llm not in [llm['llm'] for llm in data_incomplete['prompts']]:
		continue

	# Find the corresponding model in the incomplete data
	incomplete_llm = next(item for item in data_incomplete['prompts'] if item['llm'] == llm)		

	for temperature_id, temperatures in enumerate(llms['prompts']):
		temperature = temperatures['temperature_level']
		# Check if the temperature is in the incomplete data
		if temperature not in [temp['temperature_level'] for temp in incomplete_llm['prompts']]:
			continue

		# Find the corresponding temperature in the incomplete data
		incomplete_temperature = next(item for item in incomplete_llm['prompts'] if item['temperature_level'] == temperature)

		for cq_rep, cqs in enumerate(temperatures['comprehension_questions']):
			repetition = cqs['repetition_id']
			# Check if the repetition is in the incomplete data
			if repetition not in [rep['repetition_id'] for rep in incomplete_temperature['comprehension_questions']]:
				continue
			# Find the corresponding repetition in the incomplete data
			incomplete_comprehension = next(item for item in incomplete_temperature['comprehension_questions'] if item['repetition_id'] == repetition)

			
			for prompt_id, comprehension_question in enumerate(cqs['prompts']):
				prompt = comprehension_question['prompt']
				format_answer = comprehension_question['format_answer']

				# Check if the prompt is in the incomplete data
				if prompt not in [prompt['prompt'] for prompt in incomplete_comprehension['prompts']]:
					continue

				# Find the corresponding prompt in the incomplete data
				incomplete_prompt = next(item for item in incomplete_comprehension['prompts'] if (item['prompt'] == prompt and item['format_answer'] == format_answer))

				if "answer" in comprehension_question:
					incomplete_prompt['answer'] = comprehension_question['answer']
					print(f"Added answer to prompt: {comprehension_question['prompt_type']} for model: {llm}, temperature: {temperature}, repetition: {repetition}")
				else:
					print(f"Prompt: {comprehension_question['prompt_type']} for model: {llm}, temperature: {temperature}, repetition: {repetition} does not have an answer.")
					
		for cq_rep, cqs in enumerate(temperatures['tasks']):
			repetition = cqs['repetition_id']
			# Check if the repetition is in the incomplete data
			if repetition not in [rep['repetition_id'] for rep in incomplete_temperature['tasks']]:
				continue
			# Find the corresponding repetition in the incomplete data
			incomplete_comprehension = next(item for item in incomplete_temperature['tasks'] if item['repetition_id'] == repetition)

			
			for prompt_id, comprehension_question in enumerate(cqs['prompts']):
				prompt = comprehension_question['prompt']
				format_answer = comprehension_question['format_answer']

				# Check if the prompt is in the incomplete data
				if prompt not in [prompt['prompt'] for prompt in incomplete_comprehension['prompts']]:
					continue

				# Find the corresponding prompt in the incomplete data
				incomplete_prompt = next(item for item in incomplete_comprehension['prompts'] if (item['prompt'] == prompt and item['format_answer'] == format_answer))

				if "answer" in comprehension_question:
					incomplete_prompt['answer'] = comprehension_question['answer']
					print(f"Added answer to prompt: {comprehension_question['id']} for model: {llm}, temperature: {temperature}, repetition: {repetition}")
				else:
					print(f"Prompt: {comprehension_question['id']} for model: {llm}, temperature: {temperature}, repetition: {repetition} does not have an answer.")

			
with open('prompts_plan_merged.json', 'w') as output:
	json.dump(data_incomplete, output, indent=4)
	print("Merged data saved to prompts_plan_merged.json")

Added answer to prompt: cq_1 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_1 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_2 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_2 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_3 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_3 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_4 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_4 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_5 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_5 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to prompt: cq_6 for model: GPT-4.5-Preview, temperature: 0.0, repetition: 1
Added answer to promp