In [1]:
import os
import json

CONFIG_DIR = "../.human_eval_config"
RESDULTS_DIR = "../results"

In [3]:
import zipfile
import datetime

os.makedirs(".backup", exist_ok=True)
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Backing up the Configs and Results
with zipfile.ZipFile(os.path.join(".backup", f"config_{current_time}"), 'w') as zipf:
    for root, dirs, files in os.walk(CONFIG_DIR):
        for file in files:
            zipf.write(os.path.join(root, file))

with zipfile.ZipFile(os.path.join(".backup", f"results_{current_time}"), 'w') as zipf:
    for root, dirs, files in os.walk(RESDULTS_DIR):
        for file in files:
            zipf.write(os.path.join(root, file))

In [4]:
with open(os.path.join(CONFIG_DIR, "config.json"), "r") as f:
    config = json.load(f)

In [5]:
results_files = [f for f in os.listdir(RESDULTS_DIR) if f.endswith(".json")]

workers = []
question_sets = []
completed_question_sets = []
incomplete_question_sets = []
for f in results_files:
    with open(os.path.join(RESDULTS_DIR, f)) as file:
        results = json.load(file)
        workers.append(results["worker_id"])
        question_sets.append(results["question_set_id"])
        if results["question_index"] == config["config"]["n_questions_per_worker"]:
            completed_question_sets.append(results["question_set_id"])
        else:
            incomplete_question_sets.append(results["question_set_id"])

In [7]:
print("Number of completed question sets:", len(set(completed_question_sets)))
print("Number of incomplete question sets:", len(set(incomplete_question_sets)))
print("Number of unique workers:", len(set(workers)))
print("Number of total workers:", len(workers))
print("Number of question sets:", len(question_sets))
print("Number of unique question sets:", len(set(question_sets)))


Number of completed question sets: 224
Number of incomplete question sets: 36
Number of unique workers: 248
Number of total workers: 264
Number of question sets: 264
Number of unique question sets: 243


In [8]:
with open(os.path.join(CONFIG_DIR, "distribution.json"), "r") as f:
    distribution = json.load(f)

distribution

{'52953abb-5db1-4040-b9b4-b2687bee2b9f': [[0,
   ['gpt-4', 'llama2_7b-chat'],
   ['89b8e241-5556-4954-bab3-d9fc747861b6',
    'd005c1b3-a5fb-426d-bbe3-f380bd3f8b7b']],
  [0,
   ['zephyr_7b-beta', 'mistral_7b-instruct'],
   ['f6829c8a-80dc-48cb-a091-77f3ba548b4d',
    '20cc7ad9-8493-4852-af81-a33008b98bcf']],
  [1,
   ['llama2_7b-chat', 'mistral_7b-instruct'],
   ['b410672d-d064-4cee-9caa-2ae9bc1bd2e8',
    '3db4098c-137a-461d-b1c6-3a92159c723d']],
  [1,
   ['zephyr_7b-beta', 'orca-mini_3b'],
   ['38a1116f-f884-4978-8973-569007955ea5',
    '9ea7a6e3-179f-4a6f-bce8-6d790114dbb0']],
  [2,
   ['starling-lm_7b', 'stablelm-zephyr_3b'],
   ['7d090d93-373f-47e0-b385-97233ede2b1f',
    '35bbe62a-e939-40bc-a8c3-23e127325944']],
  [2,
   ['orca-mini_3b', 'neural-chat_7b'],
   ['3b538261-2e49-4356-82f2-587fe3b13400',
    '8b8fd51d-48e4-4652-be52-bd87bdcbf195']],
  [3,
   ['gpt-4', 'mistral_7b-instruct'],
   ['aedf4432-0816-425a-8e5c-fdd48274403a',
    'f6020dd0-fde1-4e54-b655-9e9942b9eddb']],
  [3

In [9]:
len(distribution)

77

In [10]:
distribution_with_incpmlete = distribution.copy()
for k in distribution.keys():
    if k in completed_question_sets:
        distribution_with_incpmlete.pop(k, None)
len(distribution_with_incpmlete)

26

In [11]:
backup_check = input("Have you backed up the results and config files? (y/n) ")
if backup_check == "y":
    with open (os.path.join(CONFIG_DIR, "distribution.json"), "w") as f:
        json.dump(distribution_with_incpmlete, f, indent=4)
    updated_config = config.copy()
    updated_config["config"]["n_workers"] = len(distribution_with_incpmlete)
    with open (os.path.join(CONFIG_DIR, "config.json"), "w") as f:
        json.dump(updated_config, f, indent=4)
    os.remove(os.path.join(CONFIG_DIR, "worker_count.txt"))
else:
    print("Please backup the results and config files before proceeding.")