# Confluence Module Docker Runner

In [None]:
import os
import subprocess as sp
mnt_dir = '/home/travis/data/france_2025/test/empty_mnt' # Downloaded using: gdown 1xRltFZ1gyP_nvwHMJW-rIgClzXx8CSLC
script_ouput_directory = '/home/travis/repos/run-confluence-locally/run_scripts'

repo_directory = '/home/travis/repos' # Contains the github repos of the modules you want to run, check documentation for branches

index_range = '0-10'  # Adjust the range as needed

module_to_run = 'neobam' # Chose what module to run using the below command dict, they are listed in order. eg: run expanded_setfinder first

# This is a dictionary of all of the Confluence module run commands translated to singularity run commands.
# You should not have to change anything here.
command_dict = {
    'expanded_setfinder': f'docker run -v {mnt_dir}/input:/data setfinder -r reaches_of_interest.json -c continent.json -e -s 16 -o /data -n /data -a MetroMan HiVDI SIC NeoBAM -i index_to_run',
    'expanded_combine_data': f'docker run -v {mnt_dir}/input:/data combine_data -d /data -e -s 16',
    'input': f'docker run -v {mnt_dir}/input:/mnt/data input -r /mnt/data/expanded_reaches_of_interest.json -i index_to_run',
    'non_expanded_setfinder': f'docker run -v {mnt_dir}/input:/data setfinder -c continent.json -s 16 -o /data -n /data -a MetroMan HiVDI SIC NeoBAM -i index_to_run',
    'non_expanded_combine_data': f'docker run -v {mnt_dir}/input:/data combine_data -d /data -s 16',
    'prediagnostics': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/diagnostics/prediagnostics:/mnt/data/output prediagnostics -r reaches.json -i index_to_run',
    'unconstrained_priors': f'docker run -v {mnt_dir}/input:/mnt/data priors -r unconstrained -p usgs riggs -g -s local -i index_to_run', # Branch local_run
    'metroman': f'docker run --env AWS_BATCH_JOB_ID="foo" -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe/metroman:/mnt/data/output metroman -r metrosets.json -s local -v -i index_to_run', # branch local_run_args
    'metroman_consolidation': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe/metroman:/mnt/data/flpe metroman_consolidation -i index_to_run',
    'unconstrained_momma': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe/momma:/mnt/data/output momma -r reaches.json -m 3 -i index_to_run',
    'neobam': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe/geobam:/mnt/data/output neobam -r reaches.json -i index_to_run',
    'sad': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe/sad:/mnt/data/output sad --reachfile reaches.json --index index_to_run',
    'moi': f'docker run --env AWS_BATCH_JOB_ID="foo" -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe:/mnt/data/flpe -v {mnt_dir}/moi:/mnt/data/output moi -j basin.json -v -b unconstrained -s local -i index_to_run',
    'unconstrained_offline': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe:/mnt/data/flpe -v {mnt_dir}/moi:/mnt/data/moi -v {mnt_dir}/offline:/mnt/data/output offline unconstrained timeseries integrator reaches.json index_to_run',
    'validation': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe:/mnt/data/flpe -v {mnt_dir}/moi:/mnt/data/moi -v {mnt_dir}/offline:/mnt/data/offline -v {mnt_dir}/validation:/mnt/data/output validation reaches.json unconstrained index_to_run',
    'output': f'docker run -v {mnt_dir}/input:/mnt/data/input -v {mnt_dir}/flpe:/mnt/data/flpe -v {mnt_dir}/moi:/mnt/data/moi -v {mnt_dir}/diagnostics:/mnt/data/diagnostics -v {mnt_dir}/offline:/mnt/data/offline -v {mnt_dir}/validation:/mnt/data/validation -v {mnt_dir}/output:/mnt/data/output output -s local -j /app/metadata/metadata.json -m input priors prediagnostics momma neobam metroman sic4dvar sad moi offline validation swot -i index_to_run'
}

output_script_path = os.path.join(script_ouput_directory, f'run_{module_to_run}_{index_range}.py')

In [None]:
import os
import subprocess as sp

def build_docker_image(repo_directory, module_to_run):
    """
    Builds the Docker image for the specified module.

    Parameters:
    - repo_directory (str): Directory where the repos are located.
    - module_to_run (str): The module to build the Docker image for.
    """
    # Define the repository name (without expanded/non-expanded/etc.)
    repo_name = module_to_run.replace('non_','').replace('expanded_', '').replace('non_expanded_', '').replace('unconstrained_', '').replace('constrained_', '')
    repo_path = os.path.join(repo_directory, repo_name)
    
    # Build the Docker image
    build_command = f'docker build -t {repo_name} {repo_path}'
    print(f"Building Docker image: {build_command}")
    sp.run(build_command, shell=True, check=True)

def generate_run_script(command_dict, module_to_run, index_range, output_script_path, repo_directory, rebuild_docker):
    """
    Generates a Python script that loops through a range of indices and runs the Docker command.
    
    Parameters:
    - command_dict (dict): The dictionary of commands.
    - module_to_run (str): The selected module to run.
    - index_range (str): The range of indices (e.g., '0-100', '5-10', '7').
    - output_script_path (str): The path to output the generated Python script.
    - repo_directory (str): The path to the Confleunce repos
    - rebuild_docker (bool): Rebuild the docker image based on a repo in the repo_directory
    """
    # First, build the Docker image
    if rebuild_docker:
        build_docker_image(repo_directory, module_to_run)
        
    # Parse the index range
    index_parts = index_range.split('-')
    if len(index_parts) == 1:
        start_index = int(index_parts[0])
        end_index = int(index_parts[0])
    else:
        start_index = int(index_parts[0])
        end_index = int(index_parts[1])
    
    # Create the Python script content with the loop
    script_content = f"""
import subprocess as sp

# Docker command for the selected module
command = f'{command_dict[module_to_run]}'

# Loop through the specified index range and run the Docker command for each index
for index in range({start_index}, {end_index} + 1):
    print(f"Running command for index {{index}}")
    run_command = command.replace('index_to_run', str(index))
    sp.run(run_command, shell=True, check=True)
"""
    
    # Write the generated script to the file
    with open(output_script_path, 'w') as f:
        f.write(script_content)
    
    print(f"Python script created: {output_script_path}")
    return output_script_path



# Then, generate the Python script that runs the command
generate_run_script(command_dict = command_dict,\
                     module_to_run=module_to_run, \
                        index_range=index_range, \
                            output_script_path=output_script_path, \
                                repo_directory=repo_directory, \
                                    rebuild_docker = True)


In [None]:
# After running this notebook, there will be a {module name}.sh file generated in the same directory.
# You can either add in an array and submit the job using sbatch or you can fill out the top of the cfl_wrapper.sh and have it submit jobs for you.
# using the cfl_wrapper.sh is highly recommended if you are submitting a number of jobs larger than your HPC allows. I use it in all cases though.