Skip to content
This repository has been archived by the owner on Oct 4, 2023. It is now read-only.

Commit

Permalink
574 duplicate task attempts (#575)
Browse files Browse the repository at this point in the history
* Kill child task_attempts before retry

* remove redundant kill_children command
  • Loading branch information
nhammond committed Nov 21, 2018
1 parent e0418ed commit ced736d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
6 changes: 2 additions & 4 deletions server/loomengine_server/api/models/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def _process_error(self, detail, max_retries,
exponential_delay=False):
if self.has_terminal_status():
return
self._kill_children(detail=detail) # Do this before attempting retry
failure_count = int(getattr(self, failure_count_attribute)) + 1
if failure_count <= max_retries:
self.setattrs_and_save_with_retries(
Expand Down Expand Up @@ -166,9 +167,8 @@ def _process_error(self, detail, max_retries,
'Retries exceeded for %s' % failure_text.lower(),
detail=detail,
is_error=True)
self._kill_children(detail=detail)
self.run.fail(detail='Task %s failed' % self.uuid)

def system_error(self, detail=''):
self._process_error(
detail,
Expand Down Expand Up @@ -325,8 +325,6 @@ def create_unsaved_task_from_input_set(cls, input_set, run, run_outputs):

def create_and_activate_task_attempt(self):
try:
self._kill_children(
detail="TaskAttempt errored or timed out and was restarted.")
task_attempt = TaskAttempt.create_from_task(self)
self.activate_task_attempt(task_attempt)
return task_attempt
Expand Down
6 changes: 3 additions & 3 deletions server/loomengine_server/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ def _add_url_prefix(path):
DISABLE_DELETE = to_boolean(os.getenv('LOOM_DISABLE_DELETE', 'False'))
FORCE_RERUN = to_boolean(os.getenv('LOOM_FORCE_RERUN', 'False'))

TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS = int(os.getenv('LOOM_TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS', '60'))
TASKRUNNER_HEARTBEAT_TIMEOUT_SECONDS = int(os.getenv('LOOM_TASKRUNNER_HEARTBEAT_TIMEOUT_SECONDS', TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS*2.5))
SYSTEM_CHECK_INTERVAL_MINUTES = int(os.getenv('LOOM_SYSTEM_CHECK_INTERVAL_MINUTES', '15'))
TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS = float(os.getenv('LOOM_TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS', '60'))
TASKRUNNER_HEARTBEAT_TIMEOUT_SECONDS = float(os.getenv('LOOM_TASKRUNNER_HEARTBEAT_TIMEOUT_SECONDS', TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS*2.5))
SYSTEM_CHECK_INTERVAL_MINUTES = float(os.getenv('LOOM_SYSTEM_CHECK_INTERVAL_MINUTES', '15'))
PRESERVE_ON_FAILURE = to_boolean(os.getenv('LOOM_PRESERVE_ON_FAILURE', 'False'))
PRESERVE_ALL = to_boolean(os.getenv('LOOM_PRESERVE_ALL', 'False'))
TASK_TIMEOUT_HOURS = float(os.getenv(
Expand Down

0 comments on commit ced736d

Please sign in to comment.