Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pip 1640 fix gid issue #150

Merged
merged 3 commits into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion caper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .caper_runner import CaperRunner

__all__ = ['CaperClient', 'CaperClientSubmit', 'CaperRunner']
__version__ = '2.0.0'
__version__ = '2.1.0'
4 changes: 3 additions & 1 deletion caper/caper_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ def get_parser_and_defaults(conf_file=None):
'--cromwell-stdout',
default=DEFAULT_CROMWELL_STDOUT,
help='Local file to write STDOUT of Cromwell Java process to. '
'This is for Cromwell (not for Caper\'s logging system).',
'This is for Cromwell (not for Caper\'s logging system). '
'If this file already exists then Caper will make a new file suffixed with '
'incremented index. e.g. cromwell.out.1 ',
)
group_db = parent_runner.add_argument_group(
title='General DB settings (for both file DB and MySQL DB)'
Expand Down
12 changes: 11 additions & 1 deletion caper/caper_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@

# IMPORTANT warning for Stanford Sherlock cluster
# ====================================================================
# DO NOT install any codes/executables
# DO NOT install any codes/executables/Miniconda
# (java, conda, python, caper, pipeline's WDL, pipeline's Conda env, ...) on $SCRATCH or $OAK.
# You will see Segmentation Fault errors.
# Install all executables on $HOME or $PI_HOME instead.
Expand All @@ -173,6 +173,16 @@

# SLURM account. Define only if required by a cluster. You must define it for Stanford SCG.
slurm-account=

# IMPORTANT warning for Stanford SCG cluster
# ====================================================================
# DO NOT install any codes/executables/Miniconda
# (java, conda, python, caper, pipeline's WDL, pipeline's Conda env, ...) on your home (/home/$USER).
# Pipelines will get stuck due to slow filesystem.
# ALSO DO NOT USE /local/scratch to run pipelines. This directory is not static.
# Use $OAK storage to run pipelines, and to store codes/WDLs/executables.
# ====================================================================

"""
+ CONF_CONTENTS_SLURM_PARAM
+ CONF_CONTENTS_LOCAL_HASH_STRAT
Expand Down
20 changes: 18 additions & 2 deletions caper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ def get_abspath(path):
return path


def check_local_file_and_rename_if_exists(path, index=0):
org_path = path
if index:
path = '.'.join([path, str(index)])
if os.path.exists(path):
return check_local_file_and_rename_if_exists(org_path, index + 1)
return path


def print_version(parser, args):
if args.version:
print(version)
Expand Down Expand Up @@ -341,7 +350,11 @@ def subcmd_server(caper_runner, args, nonblocking=False):
if nonblocking:
return caper_runner.server(fileobj_stdout=sys.stdout, **args_from_cli)

cromwell_stdout = get_abspath(args.cromwell_stdout)
cromwell_stdout = check_local_file_and_rename_if_exists(
get_abspath(args.cromwell_stdout)
)
logger.info('Cromwell stdout: {stdout}'.format(stdout=cromwell_stdout))

with open(cromwell_stdout, 'w') as f:
try:
thread = caper_runner.server(fileobj_stdout=f, **args_from_cli)
Expand All @@ -356,7 +369,10 @@ def subcmd_server(caper_runner, args, nonblocking=False):


def subcmd_run(caper_runner, args):
cromwell_stdout = get_abspath(args.cromwell_stdout)
cromwell_stdout = check_local_file_and_rename_if_exists(
get_abspath(args.cromwell_stdout)
)
logger.info('Cromwell stdout: {stdout}'.format(stdout=cromwell_stdout))

with open(cromwell_stdout, 'w') as f:
try:
Expand Down
148 changes: 56 additions & 92 deletions caper/cromwell_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,12 +484,13 @@ class CromwellBackendLocal(CromwellBackendBase):

SUBMIT_DOCKER:
Cromwell falls back to 'submit_docker' instead of 'submit' if WDL task has
'docker' in runtime.
'docker' in runtime and runtime-attributes are declared in backend's config.

Docker and Singularity can map paths between inside and outside of the container.
So this is not an issue for those container environments.

For Conda, any container paths (/cromwell-executions/**) in the script is simply
replaced with CWD.
For Conda, any container paths (docker_cwd, e.g. /cromwell-executions/**)
in the script is simply replaced with CWD.

This also replaces filenames written in write_*.tsv files (globbed by WDL functions).
e.g. write_lines(), write_tsv(), ...
Expand All @@ -501,7 +502,6 @@ class CromwellBackendLocal(CromwellBackendBase):
- 'sed' is used here with a delimiter as hash mark (#)
so hash marks in output path can result in error.
- Files globbed by WDL functions other than write_*() will still have paths inside a container.

"""

RUNTIME_ATTRIBUTES = dedent(
Expand All @@ -515,44 +515,42 @@ class CromwellBackendLocal(CromwellBackendBase):
String? singularity
String? singularity_bindpath
String? gpu

## Cromwell built-in attributes
"""
)
# need to separate docker-related attributes
# to be able ignore docker in WDL task's runtime
RUNTIME_ATTRIBUTES_DOCKER = dedent(
"""
## Cromwell built-in attributes for docker
String? docker
String? docker_user
"""
)
# Do not define/use any new shell variable in this template (e.g. var=, ${var}).
# This template needs formatting (remap_path_for_singularity, remap_path_for_conda)
TEMPLATE_SUBMIT = dedent(
SUBMIT = dedent(
"""
if [ '${{defined(environment)}}' == 'true' ] && [ '${{environment}}' == 'singularity' ] || \\
[ '${{defined(environment)}}' == 'false' ] && [ '${{defined(singularity)}}' == 'true' ] && [ ! -z '${{singularity}}' ]
if [ '${defined(environment)}' == 'true' ] && [ '${environment}' == 'singularity' ] || \\
[ '${defined(environment)}' == 'false' ] && [ '${defined(singularity)}' == 'true' ] && [ ! -z '${singularity}' ]
then
mkdir -p $HOME/.singularity/lock/
flock --exclusive --timeout 600 \\
$HOME/.singularity/lock/`echo -n '${{singularity}}' | md5sum | cut -d' ' -f1` \\
singularity exec --containall ${{singularity}} echo 'Successfully pulled ${{singularity}}'
$HOME/.singularity/lock/`echo -n '${singularity}' | md5sum | cut -d' ' -f1` \\
singularity exec --containall ${singularity} echo 'Successfully pulled ${singularity}'

singularity exec --cleanenv --home=${{cwd}} \\
--bind=${{singularity_bindpath}},{bind_param_to_remap_path_for_singularity} \\
${{if defined(gpu) then ' --nv' else ''}} \\
${{singularity}} ${{job_shell}} ${{script}}
singularity exec --cleanenv --home=`dirname ${cwd}` \\
--bind=${singularity_bindpath}, \\
${if defined(gpu) then ' --nv' else ''} \\
${singularity} ${job_shell} ${script}

elif [ '${{defined(environment)}}' == 'true' ] && [ '${{environment}}' == 'conda' ] || \\
[ '${{defined(environment)}}' == 'false' ] && [ '${{defined(conda)}}' == 'true' ] && [ ! -z '${{conda}}' ]
elif [ '${defined(environment)}' == 'true' ] && [ '${environment}' == 'conda' ] || \\
[ '${defined(environment)}' == 'false' ] && [ '${defined(conda)}' == 'true' ] && [ ! -z '${conda}' ]
then
{cmd_lines_to_remap_path_for_conda}
conda run --name=${{conda}} ${{job_shell}} ${{script}}
conda run --name=${conda} ${job_shell} ${script}

else
${{job_shell}} ${{script}}
${job_shell} ${script}
fi
"""
)
SUBMIT = TEMPLATE_SUBMIT.format(
bind_param_to_remap_path_for_singularity='',
cmd_lines_to_remap_path_for_conda='',
)
SUBMIT_DOCKER = dedent(
"""
rm -f ${docker_cid}
Expand All @@ -564,36 +562,37 @@ class CromwellBackendLocal(CromwellBackendBase):
--volume=${cwd}:${docker_cwd}:delegated ${docker} ${docker_script}
rc=$(docker wait `cat ${docker_cid}`)
docker rm `cat ${docker_cid}`
else
# recover GID lost due to Cromwell running chmod 777 on CWD
chown :`stat -c '%G' ${cwd}` -R ${cwd}
chmod g+s ${cwd}

elif [ '${defined(environment)}' == 'true' ] && [ '${environment}' == 'singularity' ] || \\
[ '${defined(environment)}' == 'false' ] && [ '${defined(singularity)}' == 'true' ] && [ ! -z '${singularity}' ]
then
mkdir -p $HOME/.singularity/lock/
flock --exclusive --timeout 600 \\
$HOME/.singularity/lock/`echo -n '${singularity}' | md5sum | cut -d' ' -f1` \\
singularity exec --containall ${singularity} echo 'Successfully pulled ${singularity}'

singularity exec --cleanenv --home=${cwd} \\
--bind=${singularity_bindpath},${cwd}:${docker_cwd} \\
${if defined(gpu) then ' --nv' else ''} \\
${singularity} ${job_shell} ${script} & echo $! > ${docker_cid}
touch ${docker_cid}.not_docker
wait `cat ${docker_cid}`
rc=`echo $?`

elif [ '${defined(environment)}' == 'true' ] && [ '${environment}' == 'conda' ] || \\
[ '${defined(environment)}' == 'false' ] && [ '${defined(conda)}' == 'true' ] && [ ! -z '${conda}' ]
then
shopt -s nullglob
sed -i 's#${docker_cwd}#${cwd}#g' ${script} `dirname ${script}`/write_*.tmp
if [ '${defined(environment)}' == 'true' ] && [ '${environment}' == 'singularity' ] || \\
[ '${defined(environment)}' == 'false' ] && [ '${defined(singularity)}' == 'true' ] && [ ! -z '${singularity}' ]
then
mkdir -p $HOME/.singularity/lock/
flock --exclusive --timeout 600 \\
$HOME/.singularity/lock/`echo -n '${singularity}' | md5sum | cut -d' ' -f1` \\
singularity exec --containall ${singularity} echo 'Successfully pulled ${singularity}'

singularity exec --cleanenv --home=`dirname ${cwd}` \\
--bind=${singularity_bindpath},${cwd}:${docker_cwd} \\
${if defined(gpu) then ' --nv' else ''} \\
${singularity} ${job_shell} ${script} & echo $! > ${docker_cid}
else
# remap paths between inside and outside of a docker container
shopt -s nullglob
sed -i 's#${docker_cwd}#${cwd}#g' ${script} `dirname ${script}`/write_*.tmp

conda run --name=${conda} ${job_shell} ${script} & echo $! > ${docker_cid}
touch ${docker_cid}.not_docker
wait `cat ${docker_cid}`
rc=`echo $?`
if [ '${defined(environment)}' == 'true' ] && [ '${environment}' == 'conda' ] || \\
[ '${defined(environment)}' == 'false' ] && [ '${defined(conda)}' == 'true' ] && [ ! -z '${conda}' ]
then
conda run --name=${conda} ${job_shell} ${script} & echo $! > ${docker_cid}
else
${job_shell} ${script} & echo $! > ${docker_cid}
fi
fi

else
${job_shell} ${script} & echo $! > ${docker_cid}
touch ${docker_cid}.not_docker
wait `cat ${docker_cid}`
rc=`echo $?`
Expand Down Expand Up @@ -627,7 +626,7 @@ class CromwellBackendLocal(CromwellBackendBase):
}
},
'run-in-background': True,
'runtime-attributes': RUNTIME_ATTRIBUTES,
'runtime-attributes': RUNTIME_ATTRIBUTES + RUNTIME_ATTRIBUTES_DOCKER,
'submit': SUBMIT,
'submit-docker': SUBMIT_DOCKER,
'kill-docker': KILL_DOCKER,
Expand Down Expand Up @@ -687,15 +686,6 @@ class CromwellBackendHpc(CromwellBackendLocal):
Int? memory_mb
"""
)
HPC_SUBMIT_DOCKER = CromwellBackendLocal.TEMPLATE_SUBMIT.format(
bind_param_to_remap_path_for_singularity='${cwd}:${docker_cwd}',
cmd_lines_to_remap_path_for_conda=dedent(
"""
shopt -s nullglob
sed -i 's#${docker_cwd}#${cwd}#g' ${script} `dirname ${script}`/write_*.tmp
"""
),
)

def __init__(
self,
Expand All @@ -708,10 +698,10 @@ def __init__(
kill=None,
job_id_regex=None,
submit=None,
submit_docker=None,
runtime_attributes=None,
):
"""Base class for HPCs.
No docker support. docker attribute in WDL task's runtime will be just ignored.

Args:
check_alive:
Expand All @@ -729,10 +719,6 @@ def __init__(
submit:
Shell command lines to submit a job.
WDL syntax allowed in ${} notation.
submit_docker:
Shell command lines to submit a job
(when docker is defined in WDL task's runtime).
WDL syntax allowed in ${} notation.
runtime_attributes:
Declaration of WDL variables (attributes) used in submit, submit-docker.
This is not a shell command line but plain WDL syntax.
Expand All @@ -759,16 +745,14 @@ def __init__(
raise ValueError('job_id_regex not defined!')
if not submit:
raise ValueError('submit not defined!')
if not submit_docker:
raise ValueError('submit_docker not defined!')

config['check-alive'] = check_alive
config['kill'] = kill
# jobs are managed based on a job ID (not PID or docker_cid) on HPCs
config['kill-docker'] = kill
config['kill-docker'] = None
config['job-id-regex'] = job_id_regex
config['submit'] = submit
config['submit-docker'] = submit_docker
config['submit-docker'] = None
config['runtime-attributes'] = '\n'.join(
[
CromwellBackendLocal.RUNTIME_ATTRIBUTES,
Expand Down Expand Up @@ -868,10 +852,6 @@ def __init__(
submit=CromwellBackendLocal.SUBMIT,
slurm_resource_param=slurm_resource_param,
)
submit_docker = CromwellBackendSlurm.TEMPLATE_SLURM_SUBMIT.format(
submit=CromwellBackendHpc.HPC_SUBMIT_DOCKER,
slurm_resource_param=slurm_resource_param,
)

super().__init__(
local_out_dir=local_out_dir,
Expand All @@ -883,7 +863,6 @@ def __init__(
kill=CromwellBackendSlurm.SLURM_KILL,
job_id_regex=CromwellBackendSlurm.SLURM_JOB_ID_REGEX,
submit=submit,
submit_docker=submit_docker,
runtime_attributes=CromwellBackendSlurm.SLURM_RUNTIME_ATTRIBUTES,
)

Expand Down Expand Up @@ -959,10 +938,6 @@ def __init__(
submit = CromwellBackendSge.TEMPLATE_SGE_SUBMIT.format(
submit=CromwellBackendLocal.SUBMIT, sge_resource_param=sge_resource_param
)
submit_docker = CromwellBackendSge.TEMPLATE_SGE_SUBMIT.format(
submit=CromwellBackendHpc.HPC_SUBMIT_DOCKER,
sge_resource_param=sge_resource_param,
)

super().__init__(
local_out_dir=local_out_dir,
Expand All @@ -974,7 +949,6 @@ def __init__(
kill=CromwellBackendSge.SGE_KILL,
job_id_regex=CromwellBackendSge.SGE_JOB_ID_REGEX,
submit=submit,
submit_docker=submit_docker,
runtime_attributes=CromwellBackendSge.SGE_RUNTIME_ATTRIBUTES,
)

Expand Down Expand Up @@ -1044,10 +1018,6 @@ def __init__(
submit = CromwellBackendPbs.TEMPLATE_PBS_SUBMIT.format(
submit=CromwellBackendLocal.SUBMIT, pbs_resource_param=pbs_resource_param
)
submit_docker = CromwellBackendPbs.TEMPLATE_PBS_SUBMIT.format(
submit=CromwellBackendHpc.HPC_SUBMIT_DOCKER,
pbs_resource_param=pbs_resource_param,
)

super().__init__(
local_out_dir=local_out_dir,
Expand All @@ -1059,7 +1029,6 @@ def __init__(
kill=CromwellBackendPbs.PBS_KILL,
job_id_regex=CromwellBackendPbs.PBS_JOB_ID_REGEX,
submit=submit,
submit_docker=submit_docker,
runtime_attributes=CromwellBackendPbs.PBS_RUNTIME_ATTRIBUTES,
)

Expand Down Expand Up @@ -1128,10 +1097,6 @@ def __init__(
submit = CromwellBackendLsf.TEMPLATE_LSF_SUBMIT.format(
submit=CromwellBackendLocal.SUBMIT, lsf_resource_param=lsf_resource_param
)
submit_docker = CromwellBackendLsf.TEMPLATE_LSF_SUBMIT.format(
submit=CromwellBackendHpc.HPC_SUBMIT_DOCKER,
lsf_resource_param=lsf_resource_param,
)

super().__init__(
local_out_dir=local_out_dir,
Expand All @@ -1143,7 +1108,6 @@ def __init__(
kill=CromwellBackendLsf.LSF_KILL,
job_id_regex=CromwellBackendLsf.LSF_JOB_ID_REGEX,
submit=submit,
submit_docker=submit_docker,
runtime_attributes=CromwellBackendLsf.LSF_RUNTIME_ATTRIBUTES,
)

Expand Down