In [1]:
import os

def find_missing_results(num_results, results_dir=".", file_prefix="results_", file_suffix=".pkl"):
    """
    Traverse the current directory to find which results_i.pkl files are missing.

    Args:
        num_results (int): The total number of expected results files.
        results_dir (str): Directory where results are stored.
        file_prefix (str): Prefix of the results files.
        file_suffix (str): Suffix of the results files.
    
    Returns:
        List[int]: List of missing result file indices.
    """
    missing_files = []
    for i in range(num_results):
        file_name = f"{file_prefix}{i}{file_suffix}"
        if not os.path.exists(os.path.join(results_dir, file_name)):
            missing_files.append(i)
    return missing_files

# Example usage
missing_files = find_missing_results(231)
len(missing_files)
#41


1

In [2]:
with open("resubmit_missing_jobs.sub", "w") as f:
    f.write("""# HTCondor resubmit file for missing results
container_image = docker://atharvavidwans/qiskit_algo:latest
universe = container

Executable = submit_run_algo.sh

docker_pull_policy = ifnotpresent

request_cpus = 1
request_memory = 20GB
should_transfer_files = YES
when_to_transfer_output = ON_EXIT
transfer_input_files = ../all_str_A_Q/circuits, run_algo.py
request_disk = 20GB

# Settings to automatically release held jobs
periodic_release = (HoldReasonCode == 35)
""")
    for process in missing_files:
        f.write(f"Log = ./log/job_log_{process}.log\n")
        f.write(f"Output = ./output/job_output_{process}.txt\n")
        f.write(f"Error = ./error/job_error_{process}.txt\n")
        f.write(f"arguments = {process}\n")
        f.write(f"+LongJob = True\n")
        f.write("queue 1\n")


## Check

In [3]:
import os
import dill

all_results = {}

file_path = "../aggregate_results/all_results.pkl"
print(os.path.exists(file_path))
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
    with open(file_path, 'rb') as f:
        all_results = dill.load(f)
else:
    print("File is empty or does not exist. Initializing all_results as an empty dictionary.")

# Now you can proceed to use all_results


True


In [4]:
len(all_results)

17