Skip to content

Commit

Permalink
Merge branch 'develop' into release/v_0.9.1
Browse files Browse the repository at this point in the history
  • Loading branch information
jmlarson1 committed May 10, 2022
2 parents 891de2d + a3cb479 commit 782261f
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 50 deletions.
49 changes: 39 additions & 10 deletions docs/data_structures/calc_status.rst
Expand Up @@ -3,16 +3,18 @@
calc_status
===========

The ``calc_status`` is an integer attribute with named (enumerated) values and
a corresponding description that can be used in :ref:`sim_f<api_sim_f>` or
:ref:`gen_f<api_gen_f>` functions to capture the status of a calculation. This
is returned to the manager and printed to the ``libE_stats.txt`` file. Only the
status values ``FINISHED_PERSISTENT_SIM_TAG`` and
``FINISHED_PERSISTENT_GEN_TAG`` are currently used by the manager, but others
can still provide a useful summary in libE_stats.txt. The user determines the
status of the calculation, since it could include multiple application runs. It
can be added as a third return variable in sim_f or gen_f functions.
The calc_status codes are in the ``libensemble.message_numbers`` module.
The ``calc_status`` is similar to an exit code, and is either a built-in integer
attribute with a named (enumerated) value and corresponding description, or a
user-specified string. They are determined within user functions to capture the
status of a calculation, and returned to the manager and printed to the ``libE_stats.txt`` file.
Only the status values ``FINISHED_PERSISTENT_SIM_TAG`` and ``FINISHED_PERSISTENT_GEN_TAG``
are currently used by the manager, but others can still provide a useful summary in ``libE_stats.txt``.
The user is responsible for determining the status of a user function instance, since
a given instance could include multiple application runs of mixed rates of success.

The ``calc_status`` is the third optional return value from a user function.
Built-in codes are available in the ``libensemble.message_numbers`` module, but
users are also free to return any custom string.

Example of ``calc_status`` used along with :ref:`Executor<executor_index>` in sim_f:

Expand Down Expand Up @@ -51,6 +53,33 @@ Example of ``calc_status`` used along with :ref:`Executor<executor_index>` in si
return output, persis_info, calc_status
Example of defining and returning a custom ``calc_status`` if the built-in values
are insufficient:

.. code-block:: python
:linenos:
from libensemble.message_numbers import WORKER_DONE, TASK_FAILED
task = exctr.submit(calc_type='sim', num_procs=cores, wait_on_start=True)
task.wait(timeout=60)
file_output = read_task_output(task)
if task.errcode == 0:
if "fail" in file_output:
calc_status = "Task failed successfully?"
else:
calc_status = WORKER_DONE
else:
calc_status = TASK_FAILED
outspecs = sim_specs['out']
output = np.zeros(1, dtype=outspecs)
output['energy'][0] = final_energy
return output, persis_info, calc_status
See forces_simf.py_ for a complete example.
See uniform_or_localopt.py_ for an example of using ``FINISHED_PERSISTENT_GEN_TAG``.

Expand Down
39 changes: 13 additions & 26 deletions libensemble/manager.py
Expand Up @@ -12,22 +12,19 @@
import numpy as np

from libensemble.utils.timer import Timer

from libensemble.message_numbers import (
EVAL_SIM_TAG,
FINISHED_PERSISTENT_SIM_TAG,
EVAL_GEN_TAG,
FINISHED_PERSISTENT_GEN_TAG,
STOP_TAG,
UNSET_TAG,
PERSIS_STOP,
WORKER_KILL,
WORKER_KILL_ON_ERR,
WORKER_KILL_ON_TIMEOUT,
TASK_FAILED,
WORKER_DONE,
STOP_TAG,
MAN_SIGNAL_FINISH,
MAN_SIGNAL_KILL,
FINISHED_PERSISTENT_SIM_TAG,
FINISHED_PERSISTENT_GEN_TAG,
calc_status_strings,
)

from libensemble.message_numbers import calc_type_strings
from libensemble.comms.comms import CommFinishedException
from libensemble.worker import WorkerErrMsg
Expand Down Expand Up @@ -287,7 +284,7 @@ def _check_work_order(self, Work, w):
)
else:
assert self.W[w - 1]["active"] == 0, (
"Allocation function requested work be sent to worker %d, an " "already active worker." % w
"Allocation function requested work be sent to worker %d, an already active worker." % w
)
work_rows = Work["libE_info"]["H_rows"]
if len(work_rows):
Expand All @@ -300,7 +297,7 @@ def _check_work_order(self, Work, w):
hist_fields = self.hist.H.dtype.names
diff_fields = list(work_fields.difference(hist_fields))

assert not diff_fields, "Allocation function requested invalid fields {}" "be sent to worker={}.".format(
assert not diff_fields, "Allocation function requested invalid fields {} be sent to worker={}.".format(
diff_fields, w
)

Expand Down Expand Up @@ -386,21 +383,11 @@ def _check_received_calc(D_recv):
assert calc_type in [
EVAL_SIM_TAG,
EVAL_GEN_TAG,
], "Aborting, Unknown calculation type received. " "Received type: {}".format(calc_type)

assert calc_status in [
FINISHED_PERSISTENT_SIM_TAG,
FINISHED_PERSISTENT_GEN_TAG,
UNSET_TAG,
PERSIS_STOP,
MAN_SIGNAL_FINISH,
MAN_SIGNAL_KILL,
WORKER_KILL_ON_ERR,
WORKER_KILL_ON_TIMEOUT,
WORKER_KILL,
TASK_FAILED,
WORKER_DONE,
], "Aborting: Unknown calculation status received. " "Received status: {}".format(calc_status)
], "Aborting, Unknown calculation type received. Received type: {}".format(calc_type)

assert calc_status in list(calc_status_strings.keys()) + [PERSIS_STOP] or isinstance(
calc_status, str
), "Aborting: Unknown calculation status received. Received status: {}".format(calc_status)

def _receive_from_workers(self, persis_info):
"""Receives calculation output from workers. Loops over all
Expand Down
24 changes: 16 additions & 8 deletions libensemble/message_numbers.py
@@ -1,4 +1,6 @@
# --- Tags
# -------------------------------
# -- Basic User Function Tags ---
# -------------------------------

UNSET_TAG = 0

Expand All @@ -10,18 +12,23 @@
# When received by the manager, tells manager that worker is done with sim eval.
EVAL_GEN_TAG = 2

STOP_TAG = 3 # Manager tells worker (or persistent calc) to stop
PERSIS_STOP = 4 # Manager tells persistent calculation to stop
STOP_TAG = 3 # Manager tells worker (or persistent user_f) to stop
PERSIS_STOP = 4 # Manager tells persistent user_f to stop

# last_message_number_rst_tag

calc_type_strings = {EVAL_SIM_TAG: "sim", EVAL_GEN_TAG: "gen", PERSIS_STOP: "STOP with work", None: "No type set"}

calc_type_strings = {
EVAL_SIM_TAG: "sim",
EVAL_GEN_TAG: "gen",
PERSIS_STOP: "STOP with work",
None: "No type set",
}

# --- Signal flags (in message body vs tags)
# --------------------------------------
# -- Calculation Status/Signal Tags ----
# --------------------------------------

# first_calc_status_rst_tag
# CALC STATUS/SIGNAL FLAGS
FINISHED_PERSISTENT_SIM_TAG = 11 # tells manager sim_f done persistent mode
FINISHED_PERSISTENT_GEN_TAG = 12 # tells manager gen_f done persistent mode
MAN_SIGNAL_FINISH = 20 # Kill tasks and shutdown worker
Expand All @@ -32,9 +39,10 @@
TASK_FAILED = 33 # Calc had tasks that failed
WORKER_DONE = 34 # Calculation was successful
# last_calc_status_rst_tag
CALC_EXCEPTION = 35 # Reserved: Automatically used if gen_f or sim_f raised an exception.
CALC_EXCEPTION = 35 # Reserved: Automatically used if user_f raised an exception

calc_status_strings = {
UNSET_TAG: "Not set",
FINISHED_PERSISTENT_SIM_TAG: "Persis sim finished",
FINISHED_PERSISTENT_GEN_TAG: "Persis gen finished",
MAN_SIGNAL_FINISH: "Manager killed on finish",
Expand Down
2 changes: 1 addition & 1 deletion libensemble/sim_funcs/inverse_bayes.py
Expand Up @@ -12,7 +12,7 @@ def likelihood_calculator(H, persis_info, sim_specs, _):
for i, x in enumerate(H["x"]):
H_o["like"][i] = six_hump_camel_func(x)

return H_o, persis_info
return H_o, persis_info, "custom_status"


def six_hump_camel_func(x):
Expand Down
10 changes: 5 additions & 5 deletions libensemble/worker.py
Expand Up @@ -283,9 +283,9 @@ def _handle_calc(self, Work, calc_in):
assert isinstance(out, tuple), "Calculation output must be a tuple."
assert len(out) >= 2, "Calculation output must be at least two elements."

calc_status = out[2] if len(out) >= 3 else UNSET_TAG

if calc_status is None:
if len(out) >= 3:
calc_status = out[2]
else:
calc_status = UNSET_TAG

# Check for buffered receive
Expand All @@ -311,15 +311,15 @@ def _handle_calc(self, Work, calc_in):
calc_type_strings[calc_type],
timer,
task.timer,
calc_status_strings.get(calc_status, "Not set"),
calc_status_strings.get(calc_status, calc_status),
)
else:
calc_msg = "{} {}: {} {} Status: {}".format(
enum_desc,
calc_id,
calc_type_strings[calc_type],
timer,
calc_status_strings.get(calc_status, "Not set"),
calc_status_strings.get(calc_status, calc_status),
)

logging.getLogger(LogConfig.config.stats_name).info(calc_msg)
Expand Down

0 comments on commit 782261f

Please sign in to comment.