In [2]:
import os
import subprocess as sp
mnt_dir = '/nas/cee-water/cjgleason/travis/data/confluence_runs/osu_testing/mnt'
sif_dir = '/nas/cee-water/cjgleason/travis/repos/singularity_sifs'
partition = 'cpu-preempt'
singularity_version = '3.7.0'
report_dir = '/nas/cee-water/cjgleason/travis/repos/confluence-local/Report'

command_dict = {
    'ssc':'singularity run --bind /nas/cee-water/cjgleason/travis/data/ssc/:/mnt/data, /home/tsimmons_umass_edu/:/root/ ssc_pre.sif -i ${SLURM_ARRAY_TASK_ID}',
    'datagen':'singularity run --bind ' + f'{mnt_dir}/input:/data --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'datagen.sif')+' -c river -i -235 -p POCLOUD -s SWOT_L2_HR_RiverSP_1.1 -t 2023-01-01T00:00:00Z,2023-10-05T23:59:59Z -d /data -k 1b5b3fc2-b1b5-4495-b6ea-f05ae0b09519 -w /data/sword_patches_v215.json -j /data/continent.json -u /data/reaches_of_interest.json --hpc',
    'input': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'input.sif')+' -i -235 -r /mnt/data/reach_node.json -p /mnt/data/cycle_passes.json -c river -d /mnt/data/swot -s /mnt/data/s3_list.json --hpc',
    'priors_unconstrained': 'singularity run -c --bind '+ f'{mnt_dir}/input:/mnt/data,/tmp:/tmp --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'priors_unconstrained.sif')+' -i -235 -r unconstrained -p usgs gbpriors -l 0000 -g',
    'priors_constrained': 'singularity run -c --bind '+ f'{mnt_dir}/input:/mnt/data,/tmp:/tmp --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'priors_constrained.sif')+' -i -235 -r constrained -p usgs gbpriors -l 0000 -g',
    'prediagnostics':'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/diagnostics/prediagnostics:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'prediagnostics.sif'),
    'sad':'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe/sad:/mnt/data/output --env '+ 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'sad.sif'),
    'momma': 'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe/momma:/mnt/data/output  --env '+ 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'momma.sif'),
    'metroman': 'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe/metroman:/mnt/data/output  --env '+ 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'metroman.sif'),
    'h2ivdi': 'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe/hivdi:/mnt/data/output  --env '+ 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'h2ivdi.sif'),
    'geobam': 'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe/geobam:/mnt/data/output  --env '+ 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'neobam.sif'),
    'moi_unconstrained': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/moi:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'moi_unconstrained.sif') + ' basin.json -v unconstrained',
    'moi_constrained': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/moi:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'moi_constrained.sif') + ' basin.json -v constrained',
    'offline_unconstrained':'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/offline:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'offline.sif') + ' unconstrained timeseries integrator reaches.json',
    'offline_constrained':'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/offline:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'offline.sif') + ' constrained timeseries integrator reaches.json',
    'output_first_unconstrained': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/diagnostics:/mnt/data/diagnostics,{mnt_dir}/offline:/mnt/data/offline,{mnt_dir}/validation:/mnt/data/validation,{mnt_dir}/output:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'output_first_unconstrained.sif')  + ' -i -235 -r unconstrained -m input priors prediag momma hivdi neobam metroman sic4dvar sad moi offline swot',
    'output_first_constrained': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/diagnostics:/mnt/data/diagnostics,{mnt_dir}/offline:/mnt/data/offline,{mnt_dir}/validation:/mnt/data/validation,{mnt_dir}/output:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'output_first_constrained.sif')  + ' -i -235 -r constrained -m input priors prediag momma hivdi neobam metroman sic4dvar sad moi offline swot',
    'postd-flpe':'singularity run --bind '+ f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/diagnostics/postdiagnostics/reach:/mnt/data/output,{mnt_dir}/output/sos:/mnt/data/results --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'postd-flpe.sif') + ' 0.25 reaches.json LOCAL',
    'postd-moi':'singularity run --bind '+ f'{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/diagnostics/postdiagnostics/basin:/mnt/data/output,{mnt_dir}/output/sos:/mnt/data/results --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'postd-moi.sif') + ' 0.25 reaches.json LOCAL',
    'validation_unconstrained':'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/offline:/mnt/data/offline,{mnt_dir}/validation:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir,'validation.sif') + ' reaches.json unconstrained',
    'validation_consrained':'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/offline:/mnt/data/offline,{mnt_dir}/validation:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir,'validation.sif') + ' reaches.json constrained',
    'output_final_unconstrained': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/diagnostics:/mnt/data/diagnostics,{mnt_dir}/offline:/mnt/data/offline,{mnt_dir}/validation:/mnt/data/validation,{mnt_dir}/output:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'output_first_unconstrained.sif')  + ' -i -235 -r unconstrained -m input priors prediag momma hivdi neobam metroman sic4dvar sad moi offline postdiagnostics validation swot',
    'output_final_constrained': 'singularity run --bind ' + f'{mnt_dir}/input:/mnt/data/input,{mnt_dir}/flpe:/mnt/data/flpe,{mnt_dir}/moi:/mnt/data/moi,{mnt_dir}/diagnostics:/mnt/data/diagnostics,{mnt_dir}/offline:/mnt/data/offline,{mnt_dir}/validation:/mnt/data/validation,{mnt_dir}/output:/mnt/data/output --env ' + 'AWS_BATCH_JOB_ARRAY_INDEX=${SLURM_ARRAY_TASK_ID} ' + os.path.join(sif_dir, 'output_first_constrained.sif')  + ' -i -235 -r constrained -m input priors prediag momma hivdi neobam metroman sic4dvar sad moi offline postdiagnostics validation swot',
}

In [3]:
module_to_run = 'output_final_unconstrained'

node_details = {
    'cores' :48,
    'ram' : 128
}

job_details = {
    'partition': partition,
    'number_things_to_process':6,
    'nodes':1,
    'cores':8,
    'ram':12,
    'time': '10:00:00',
    'name': 'cfl_output',
    'run_command': command_dict[module_to_run],
    'module_name': module_to_run
}





In [7]:


def construct_sh_dict(node_details, job_details):

    ram_limited = False
    cpu_limited = False

    sh_dict = {
        'partition': job_details['partition'],
        'nodes': job_details['nodes'],
        'ntasks-per-node':1,
        'cpus-per-task':job_details['cores'],
        'mem':str(job_details['ram'])+'G',
        'time': job_details['time'],
        'job-name': job_details['name'],
        'array': '0-' + str(job_details['number_things_to_process']),
        'run_command': job_details['run_command']

    }

    resources_available = {
        'cores': node_details['cores'] * job_details['nodes'],
        'ram': node_details['ram'] * job_details['nodes']
    }

    ram_constraint = resources_available['ram']/job_details['ram']
    cpu_constraint = resources_available['cores']/job_details['cores']
    print(ram_constraint, cpu_constraint)

    if ram_constraint > cpu_constraint:
        cpu_limited = True

    else:
        ram_limited = True

    if cpu_limited:
        array_mod = cpu_constraint
    
    if ram_limited:
        array_mod = ram_constraint

    if array_mod < job_details['number_things_to_process']:
        sh_dict['array'] += f'%{str(int(array_mod))}'

    return sh_dict

def create_slurm_script(node_details=node_details, job_details=job_details, build_image=False, sif_dir='foo', singularity_version = '3.7.0'):
    if build_image:
        module_name = job_details['module_name']
        sp.run(['singularity', 'build', '-F',os.path.join(sif_dir,module_name + '.sif'), f'docker://travissimmons/{module_name.split("_")[0]}'])
        


    sh_dict = construct_sh_dict(node_details=node_details, job_details=job_details)
    file = open('items.sh','w')
    file.write('#!/bin/bash \n')
    file.write(f'#SBATCH -o {os.path.join(report_dir, "output.%a.out")}' + ' \n')

    for item in sh_dict:
        if item != 'run_command':
            file.write(f'#SBATCH --{item}={sh_dict[item]} \n')
        else:
            file.write(f'\nmodule load singularity/{singularity_version}\n')
            file.write(f'{sh_dict[item]}')
    file.close()


create_slurm_script(node_details=node_details, job_details=job_details, build_image= False, sif_dir = sif_dir, singularity_version=singularity_version)


10.666666666666666 6.0
