In [1]:
import os
import json
import itertools

# Define expected set
datasets = ['cifar10-valid', 'cifar100', 'ImageNet16-120']
acqs = ['StablePBGI1e-3', 'StablePBGI1e-4', 'StablePBGI1e-5', 'LogEIC', 'LCB', 'TS']
seeds = range(50)
expected_keys = set((d, a, s) for d, a, s in itertools.product(datasets, acqs, seeds))

results_dir = "../scripts/NATS_results"
all_data = {}
loaded_keys = set()

for fname in os.listdir(results_dir):
    if not fname.endswith(".jsonl"):
        continue

    parts = fname.replace('.jsonl', '').split('_')
    dataset = parts[0]
    seed = int(parts[1].replace('seed', ''))
    acq = parts[2]
    key = (dataset, acq, seed)

    with open(os.path.join(results_dir, fname), 'r') as f:
        logs = json.load(f)  # not line-by-line anymore!

    all_data[key] = logs
    loaded_keys.add(key)

missing_keys = expected_keys - loaded_keys

print(f"✅ Loaded {len(loaded_keys)} runs")
print(f"❌ Missing {len(missing_keys)} runs")
for k in sorted(list(missing_keys))[:10]:  # just show a few
    print(k)

✅ Loaded 897 runs
❌ Missing 3 runs
('ImageNet16-120', 'StablePBGI1e-3', 22)
('ImageNet16-120', 'StablePBGI1e-4', 22)
('ImageNet16-120', 'StablePBGI1e-5', 22)


In [None]:
import os
import json

config_dir = "../scripts/NATS_configs"
result_dir = "../scripts/NATS_results"
output_file = "../scripts/missing.txt"

def normalize_acq(acq):
    """Normalize acquisition function to match .jsonl filename format."""
    return acq.replace("(", "").replace(")", "").replace(" ", "")

# Collect actual result filenames (without extension)
result_files = set(
    f for f in os.listdir(result_dir) if f.endswith(".jsonl")
)

missing_configs = []

for fname in os.listdir(config_dir):
    if not fname.startswith("config_") or not fname.endswith(".json"):
        continue

    config_path = os.path.join(config_dir, fname)

    try:
        with open(config_path, 'r') as f:
            config = json.load(f)
    except Exception as e:
        print(f"⚠️ Skipping {fname} due to error: {e}")
        continue

    dataset = config["dataset_name"]
    seed = config["seed"]
    acq = normalize_acq(config["acquisition_function"])
    
    expected_result_name = f"{dataset}_seed{seed}_{acq}.jsonl"

    if expected_result_name not in result_files:
        missing_configs.append(config_path)

# Write missing list
with open(output_file, "w") as f:
    for config_path in sorted(missing_configs):
        f.write(config_path + "\n")

print(f"✅ Found {len(missing_configs)} missing configs. Written to {output_file}")

✅ Found 3 missing configs. Written to ../scripts/missing.txt
