In [33]:
import os
import json

CONFIG_DIR = "../.human_eval_config"
RESDULTS_DIR = "../results"

In [34]:
import zipfile
import datetime

os.makedirs(".backup", exist_ok=True)
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Backing up the Configs and Results
with zipfile.ZipFile(os.path.join(".backup", f"config_{current_time}"), 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(CONFIG_DIR):
        for file in files:
            zipf.write(os.path.join(root, file))

with zipfile.ZipFile(os.path.join(".backup", f"results_{current_time}"), 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(RESDULTS_DIR):
        for file in files:
            zipf.write(os.path.join(root, file))

In [35]:
current_time

'2024-02-15_09-21-37'

In [36]:
with open(os.path.join(CONFIG_DIR, "config.json"), "r") as f:
    config = json.load(f)

In [37]:
results_files = [f for f in os.listdir(RESDULTS_DIR) if f.endswith(".json")]

time_threshold = 180
workers = []
question_sets = []
completed_question_sets = []
incomplete_question_sets = []
fast_question_sets = []
for f in results_files:
    with open(os.path.join(RESDULTS_DIR, f)) as file:
        results = json.load(file)
        workers.append(results["worker_id"])
        question_sets.append(results["question_set_id"])
        if results["question_index"] == config["config"]["n_questions_per_worker"]:
            completed_question_sets.append(results["question_set_id"])
            time_taken = results["end_time"] - results["start_time"]
            if time_taken < time_threshold:
                fast_question_sets.append((results["question_set_id"], f))
        else:
            incomplete_question_sets.append(results["question_set_id"])
        

In [38]:
print("Number of completed question sets:", len(set(completed_question_sets)))
print("Number of incomplete question sets:", len(set(incomplete_question_sets)))
print("Number of unique workers:", len(set(workers)))
print("Number of total workers:", len(workers))
print("Number of question sets:", len(question_sets))
print("Number of unique question sets:", len(set(question_sets)))
print("Number of fast question sets:", len(set(fast_question_sets)))

Number of completed question sets: 249
Number of incomplete question sets: 38
Number of unique workers: 280
Number of total workers: 298
Number of question sets: 298
Number of unique question sets: 251
Number of fast question sets: 2


In [32]:
fast_question_sets

[('fb493c4a-f5f9-4c9b-8c08-d3916a0389d0',
  '7786565_fb493c4a-f5f9-4c9b-8c08-d3916a0389d0.json'),
 ('1994e459-e8e0-4c9f-8440-6a5f5eebae76',
  'ashish_1994e459-e8e0-4c9f-8440-6a5f5eebae76.json')]

In [7]:
with open(os.path.join(CONFIG_DIR, "distribution.json"), "r") as f:
    distribution = json.load(f)

distribution

{'5922cdc7-bcd2-45b0-8fb5-622d2a6c8130': [[0,
   ['starling-lm_7b', 'gpt-4'],
   ['240e27ae-509d-4ebd-a3ba-d29e2f9288b4',
    '066c3de3-3054-4820-a0f7-8ed96890bedc']],
  [0,
   ['stablelm-zephyr_3b', 'openchat_7b-v3.5'],
   ['da47e851-a76e-4e92-a084-de17160d33e7',
    '46af108f-fba1-4245-9761-42faafe7a584']],
  [1,
   ['zephyr_7b-beta', 'stablelm-zephyr_3b'],
   ['e91820a7-1622-4842-904c-025379aec601',
    '6db270e5-c46e-41e4-9ad6-6fbac79ea48c']],
  [1,
   ['openchat_7b-v3.5', 'mistral_7b-instruct'],
   ['7f7d0c04-0374-4afe-a996-e217d8b84140',
    'cee099f4-37c1-48bd-a7c5-7ecc1dcc3cea']],
  [2,
   ['zephyr_7b-beta', 'gpt-4'],
   ['8f71f142-a860-478f-a402-f48ce6fa1a26',
    'fdee3455-9c8c-4d27-a912-4a7306ab776f']],
  [2,
   ['llama2_7b-chat', 'openchat_7b-v3.5'],
   ['6e5bc153-1fde-41e0-8026-a0841b25e1a1',
    '2a7894aa-5e1c-4328-ac1e-82991c293c2a']],
  [3,
   ['zephyr_7b-beta', 'openchat_7b-v3.5'],
   ['5de27147-71e6-424c-bbe0-0275281d6d16',
    '9d4dea3a-c23f-4ed4-9ccb-eb747baea2b2']]

In [8]:
len(distribution)

5

In [9]:
distribution_with_incpmlete = distribution.copy()
for k in distribution.keys():
    if k in set(completed_question_sets):
        distribution_with_incpmlete.pop(k, None)
len(distribution_with_incpmlete)

3

In [10]:
backup_check = input("Have you backed up the results and config files? (y/n) ")
if backup_check == "y":
    with open (os.path.join(CONFIG_DIR, "distribution.json"), "w") as f:
        json.dump(distribution_with_incpmlete, f, indent=4)
    updated_config = config.copy()
    updated_config["config"]["n_workers"] = len(distribution_with_incpmlete)
    with open (os.path.join(CONFIG_DIR, "config.json"), "w") as f:
        json.dump(updated_config, f, indent=4)
    os.remove(os.path.join(CONFIG_DIR, "worker_count.txt"))
else:
    print("Please backup the results and config files before proceeding.")