Merge pull request #1068 from Libensemble/feature/gen_procs_gpus

Add gen options for num_procs and num_gpus
Libensemble · Sep 1, 2023 · aa2447c · aa2447c
2 parents 1de569c + 51d102c
commit aa2447c
Show file tree

Hide file tree

Showing 11 changed files with 334 additions and 47 deletions.
diff --git a/.spell b/.spell
@@ -4,3 +4,4 @@ apoints
 numer
 hist
 inout
+slac
diff --git a/docs/data_structures/libE_specs.rst b/docs/data_structures/libE_specs.rst
@@ -206,6 +206,16 @@ the ``LibeSpecs`` class. When provided as a Python class, options are validated
                     By default resources will be divided by workers (excluding
                     ``zero_resource_workers``).
 
+                "gen_num_procs" [int] = ``0``:
+                    The default number of processors (MPI ranks) required by generators. Unless
+                    overridden by equivalent `persis_info` settings, generators will be allocated
+                    this many processors for applications launched via the MPIExecutor.
+
+                "gen_num_gpus" [int] = ``0``:
+                    The default number of GPUs required by generators. Unless overridden by
+                    the equivalent `persis_info` settings, generators will be allocated this
+                    many GPUs.
+
                 "enforce_worker_core_bounds" [bool] = ``False``:
                     Permit submission of tasks with a
                     higher processor count than the CPUs available to the worker.

diff --git a/docs/resource_manager/overview.rst b/docs/resource_manager/overview.rst
@@ -193,10 +193,17 @@ if ``split2fit`` is *False*, as this could otherwise never be scheduled.
 Varying generator resources
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For all supporting allocation functions, setting the ``persis_info["gen_resources"]``
-to an integer value will provide resource sets to generators when they are started,
-with the default to provide no resources. This could be set in the calling script
-or inside the allocation function.
+By default, generators are not allocated resources in dynamic mode. Fixed resources
+for the generator can be set using the *libE_specs* options
+``gen_num_procs`` and ``gen_num_gpus``, which takes an integer value.
+If only  ``gen_num_gpus`` is set, then number of processors will match.
+
+To vary generator resources, ``persis_info`` settings can be used in allocation
+functions before calling the ``gen_work`` support function. This takes the
+same options (``gen_num_procs`` and ``gen_num_gpus``)
+
+Alternatively, the setting ``persis_info["gen_resources"]`` can also be set to
+a number of resource sets.
 
 Note that persistent workers maintain their resources until coming out of a
 persistent state.

diff --git a/docs/resource_manager/zero_resource_workers.rst b/docs/resource_manager/zero_resource_workers.rst
@@ -50,9 +50,17 @@ worker for the persistent generator - a common use-case.
 
 In general, the number of resource sets should be set to enable the maximum
 concurrency desired by the ensemble, taking into account generators and simulators.
-Users can set generator resources by setting ``persis_info["gen_resources"]``
-to an integer value, representing the number of resource sets to give to the
-generator. The default is zero.
+
+Users can set generator resources using the *libE_specs* options
+``gen_num_procs`` and/or ``gen_num_gpus``, which take an integer values.
+If only  ``gen_num_gpus`` is set, then number of processors will match.
+
+To vary generator resources, ``persis_info`` settings can be used in allocation
+functions before calling the ``gen_work`` support function. This takes the
+same options (``gen_num_procs`` and ``gen_num_gpus``).
+
+Alternatively, the setting ``persis_info["gen_resources"]`` can also be set to
+a number of resource sets.
 
 The available nodes are always divided by the number of resource sets, and there
 may be multiple nodes or a partition of a node in each resource set. If the split

diff --git a/libensemble/gen_funcs/persistent_sampling_var_resources.py b/libensemble/gen_funcs/persistent_sampling_var_resources.py
@@ -6,14 +6,17 @@
 
 import numpy as np
 
+from libensemble.executors.executor import Executor
 from libensemble.message_numbers import EVAL_GEN_TAG, FINISHED_PERSISTENT_GEN_TAG, PERSIS_STOP, STOP_TAG
 from libensemble.tools.persistent_support import PersistentSupport
+from libensemble.tools.test_support import check_gpu_setting
 
 __all__ = [
     "uniform_sample",
     "uniform_sample_with_procs_gpus",
     "uniform_sample_with_var_priorities",
     "uniform_sample_diff_simulations",
+    "uniform_sample_with_sim_gen_resources",
 ]
 
 
@@ -145,3 +148,51 @@ def uniform_sample_diff_simulations(_, persis_info, gen_specs, libE_info):
             b = len(calc_in)
 
     return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG
+
+
+def uniform_sample_with_sim_gen_resources(_, persis_info, gen_specs, libE_info):
+    """
+    Randomly requests a different number of processors and gpus to be used in the
+    evaluation of the generated points.
+
+    .. seealso::
+        `test_GPU_variable_resources.py <https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/regression_tests/test_GPU_variable_resources.py>`_
+    """  # noqa
+
+    b, n, lb, ub = _get_user_params(gen_specs["user"])
+    rng = persis_info["rand_stream"]
+    ps = PersistentSupport(libE_info, EVAL_GEN_TAG)
+    tag = None
+
+    dry_run = gen_specs["user"].get("dry_run", False)  # logs run lines instead of running
+
+    while tag not in [STOP_TAG, PERSIS_STOP]:
+        H_o = np.zeros(b, dtype=gen_specs["out"])
+        H_o["x"] = rng.uniform(lb, ub, (b, n))
+
+        # Run an app using resources given by libE_specs or persis_info (test purposes only)
+        task = Executor.executor.submit(
+            app_name="six_hump_camel",
+            app_args="-0.99 -0.19",
+            stdout="out.txt",
+            stderr="err.txt",
+            dry_run=dry_run,
+        )
+
+        if not dry_run:
+            task.wait()  # Wait for run to complete
+
+        # Asserts GPU set correctly (for known MPI runners)
+        check_gpu_setting(task, print_setting=True)
+
+        # Set resources for sims
+        nprocs = rng.integers(1, gen_specs["user"]["max_procs"] + 1, b)
+        H_o["num_procs"] = nprocs  # This would get matched to GPUs anyway, if no other config given
+        H_o["num_gpus"] = nprocs
+        print(f"GEN created {b} sims requiring {nprocs} procs. One GPU per proc", flush=True)
+
+        tag, Work, calc_in = ps.send_recv(H_o)
+        if hasattr(calc_in, "__len__"):
+            b = len(calc_in)
+
+    return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG
diff --git a/libensemble/manager.py b/libensemble/manager.py
@@ -196,6 +196,8 @@ def __init__(
         dyn_keys = ("resource_sets", "num_procs", "num_gpus")
         dyn_keys_in_H = any(k in self.hist.H.dtype.names for k in dyn_keys)
         self.use_resource_sets = dyn_keys_in_H or self.libE_specs.get("num_resource_sets")
+        self.gen_num_procs = libE_specs.get("gen_num_procs", 0)
+        self.gen_num_gpus = libE_specs.get("gen_num_gpus", 0)
 
         self.W = np.zeros(len(self.wcomms), dtype=Manager.worker_dtype)
         self.W["worker_id"] = np.arange(len(self.wcomms)) + 1
@@ -571,6 +573,8 @@ def _get_alloc_libE_info(self) -> dict:
             "sim_ended_count": self.hist.sim_ended_count,
             "sim_max_given": self._sim_max_given(),
             "use_resource_sets": self.use_resource_sets,
+            "gen_num_procs": self.gen_num_procs,
+            "gen_num_gpus": self.gen_num_gpus,
         }
 
     def _alloc_work(self, H: npt.NDArray, persis_info: dict) -> dict:

diff --git a/libensemble/specs.py b/libensemble/specs.py
@@ -448,6 +448,20 @@ class LibeSpecs(BaseModel):
     If not set, resources will be divided evenly (excluding zero_resource_workers).
     """
 
+    gen_num_procs: Optional[int]
+    """
+    The default number of processors (MPI ranks) required by generators. Unless
+    overridden by the equivalent `persis_info` settings, generators will be
+    allocated this many processors for applications launched via the MPIExecutor.
+    """
+
+    gen_num_gpus: Optional[int]
+    """
+    The default number of GPUs required by generators. Unless overridden by
+    the equivalent `persis_info` settings, generators will be allocated this
+    many GPUs.
+    """
+
     enforce_worker_core_bounds: Optional[bool] = False
     """
     If ``False``, the Executor will permit submission of tasks with a

diff --git a/libensemble/tests/functionality_tests/test_GPU_gen_resources.py b/libensemble/tests/functionality_tests/test_GPU_gen_resources.py
@@ -0,0 +1,130 @@
+"""
+Tests variable resource detection and automatic GPU assignment in both
+generator and simulators.
+
+The persistent generator creates simulations with variable resource requirements,
+while also requiring resources itself. The resources required by a sim must
+not be larger than what remains once the generator resources are assigned.
+
+The sim_f (gpu_variable_resources_from_gen) asserts that GPUs assignment
+is correct for the default method for the MPI runner. GPUs are not actually
+used for default application. Four GPUs per node is mocked up below (if this line
+is removed, libEnsemble will detect any GPUs available).
+
+A dry_run option is provided. This can be set in the calling script, and will
+just print run-lines and GPU settings. This may be used for testing run-lines
+produced and GPU settings for different MPI runners.
+
+Execute via one of the following commands (e.g. 4 workers):
+   mpiexec -np 5 python test_GPU_gen_resources.py
+   python test_GPU_gen_resources.py --comms local --nworkers 4
+
+When running with the above command, the number of concurrent evaluations of
+the objective function will be 4, as one of the five workers will be the
+persistent generator.
+"""
+
+# Do not change these lines - they are parsed by run-tests.sh
+# TESTSUITE_COMMS: mpi local
+# TESTSUITE_NPROCS: 5
+
+import sys
+
+import numpy as np
+
+from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
+from libensemble.executors.mpi_executor import MPIExecutor
+from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_sim_gen_resources as gen_f
+
+# Import libEnsemble items for this test
+from libensemble.libE import libE
+from libensemble.sim_funcs import six_hump_camel
+from libensemble.sim_funcs.var_resources import gpu_variable_resources_from_gen as sim_f
+from libensemble.tools import add_unique_random_streams, parse_args
+
+# from libensemble import logger
+# logger.set_level("DEBUG")  # For testing the test
+
+
+# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows).
+if __name__ == "__main__":
+    nworkers, is_manager, libE_specs, _ = parse_args()
+
+    libE_specs["num_resource_sets"] = nworkers  # Persistent gen DOES need resources
+
+    # Mock GPU system / uncomment to detect GPUs
+    libE_specs["sim_dirs_make"] = True  # Will only contain files if dry_run is False
+    libE_specs["gen_dirs_make"] = True  # Will only contain files if dry_run is False
+    libE_specs["ensemble_dir_path"] = "./ensemble_GPU_gen_resources_w" + str(nworkers)
+    libE_specs["reuse_output_dir"] = True
+    dry_run = True
+
+    if libE_specs["comms"] == "tcp":
+        sys.exit("This test only runs with MPI or local -- aborting...")
+
+    # Get paths for applications to run
+    six_hump_camel_app = six_hump_camel.__file__
+    exctr = MPIExecutor()
+    exctr.register_app(full_path=six_hump_camel_app, app_name="six_hump_camel")
+
+    n = 2
+    sim_specs = {
+        "sim_f": sim_f,
+        "in": ["x"],
+        "out": [("f", float)],
+        "user": {"dry_run": dry_run},
+    }
+
+    gen_specs = {
+        "gen_f": gen_f,
+        "persis_in": ["f", "x", "sim_id"],
+        "out": [("num_procs", int), ("num_gpus", int), ("x", float, n)],
+        "user": {
+            "initial_batch_size": nworkers - 1,
+            "max_procs": nworkers - 1,  # Any sim created can req. 1 worker up to all.
+            "lb": np.array([-3, -2]),
+            "ub": np.array([3, 2]),
+            "dry_run": dry_run,
+        },
+    }
+
+    alloc_specs = {
+        "alloc_f": alloc_f,
+        "user": {
+            "give_all_with_same_priority": False,
+            "async_return": False,  # False batch returns
+        },
+    }
+
+    exit_criteria = {"sim_max": 20}
+    libE_specs["resource_info"] = {"cores_on_node": (nworkers * 2, nworkers * 4), "gpus_on_node": nworkers}
+
+    base_libE_specs = libE_specs.copy()
+    for run in range(5):
+
+        # reset
+        libE_specs = base_libE_specs.copy()
+        persis_info = add_unique_random_streams({}, nworkers + 1)
+
+        if run == 0:
+            libE_specs["gen_num_procs"] = 2
+        elif run == 1:
+            libE_specs["gen_num_gpus"] = 1
+        elif run == 2:
+            persis_info["gen_num_gpus"] = 1
+        elif run == 3:
+            # Two GPUs per resource set
+            libE_specs["resource_info"]["gpus_on_node"] = nworkers * 2
+            persis_info["gen_num_gpus"] = 1
+        elif run == 4:
+            # Two GPUs requested for gen
+            persis_info["gen_num_procs"] = 2
+            persis_info["gen_num_gpus"] = 2
+            gen_specs["user"]["max_procs"] = max(nworkers - 2, 1)
+
+        # Perform the run
+        H, persis_info, flag = libE(
+            sim_specs, gen_specs, exit_criteria, persis_info, libE_specs=libE_specs, alloc_specs=alloc_specs
+        )
+
+# All asserts are in gen and sim funcs
diff --git a/libensemble/tests/regression_tests/test_GPU_variable_resources.py b/libensemble/tests/regression_tests/test_GPU_variable_resources.py
@@ -3,7 +3,7 @@
 
 The persistent generator creates simulations with variable resource requirements.
 
-The sim_f (gpu_variable_resources) asserts that GPUs assignment
+The sim_f (gpu_variable_resources_from_gen) asserts that GPUs assignment
 is correct for the default method for the MPI runner. GPUs are not actually
 used for default application. Four GPUs per node is mocked up below (if this line
 is removed, libEnsemble will detect any GPUs available).

diff --git a/libensemble/tests/unit_tests/test_allocation_funcs_and_support.py b/libensemble/tests/unit_tests/test_allocation_funcs_and_support.py
@@ -37,7 +37,9 @@
 
 
 def initialize_resources():
-    Resources.init_resources({"comms": "local", "nworkers": 4, "num_resource_sets": 4})
+    platform_info = {"cores_per_node": 8, "gpus_per_node": 4}
+    libE_specs = {"comms": "local", "nworkers": 4, "num_resource_sets": 4}
+    Resources.init_resources(libE_specs=libE_specs, platform_info=platform_info)
     Resources.resources.set_resource_manager(4)
 
 
@@ -218,8 +220,20 @@ def test_als_gen_work():
     als = AllocSupport(W, True, persis_info=persis_info)
     Work = {}
     Work[1] = als.gen_work(1, ["sim_id"], range(0, 5), persis_info[1])
+    assert Work[1]["libE_info"]["rset_team"] == [0], "Resource set should be assigned in libE_info"
+    del persis_info["gen_resources"]
 
-    assert len(Work[1]["libE_info"]["rset_team"]), "Resource set should be assigned in libE_info"
+    persis_info["gen_num_procs"] = 2
+    Work[2] = als.gen_work(1, ["sim_id"], range(0, 5), persis_info[2])
+    assert Work[2]["libE_info"]["rset_team"] == [1], "Resource set should be assigned in libE_info"
+    assert Work[2]["libE_info"]["num_procs"] == 2, "num_procs set should be assigned in libE_info"
+
+    persis_info["gen_num_procs"] = 2
+    persis_info["gen_num_gpus"] = 2
+    Work[3] = als.gen_work(1, ["sim_id"], range(0, 5), persis_info[3])
+    assert Work[3]["libE_info"]["rset_team"] == [2, 3], "Resource set should be assigned in libE_info"
+    assert Work[3]["libE_info"]["num_procs"] == 2, "num_procs set should be assigned in libE_info"
+    assert Work[3]["libE_info"]["num_gpus"] == 2, "num_procs set should be assigned in libE_info"
 
     clear_resources()
 
@@ -424,30 +438,39 @@ def test_als_points_by_priority():
 
 def test_convert_to_rsets():
     user_params = []
-    libE_info = {}
     gen_fields = [("num_procs", int), ("num_gpus", int)]
     H = np.zeros(5, dtype=libE_fields + gen_fields)
 
     H_rows = 1
-    H[H_rows]["num_gpus"] = 3
+    num_gpus = 3
+    H[H_rows]["num_gpus"] = num_gpus
     units_str = "num_gpus"
 
     gpus_per_rset = 1
-    num_rsets = AllocSupport._convert_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
+    libE_info, num_rsets = {}, None  # Reset
+    num_rsets = AllocSupport._convert_to_rsets(libE_info, user_params, gpus_per_rset, num_gpus, units_str)
+    assert num_rsets == 3, f"Unexpected number of rsets {num_rsets}"
+    assert libE_info["num_gpus"] == 3, f"Unexpected number for num_gpus {libE_info['num_gpus']}"
+
+    libE_info, num_rsets = {}, None  # Reset
+    num_rsets = AllocSupport._convert_rows_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
     assert num_rsets == 3, f"Unexpected number of rsets {num_rsets}"
     assert libE_info["num_gpus"] == 3, f"Unexpected number for num_gpus {libE_info['num_gpus']}"
 
     gpus_per_rset = 2
-    num_rsets = AllocSupport._convert_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
+    libE_info, num_rsets = {}, None  # Reset
+    num_rsets = AllocSupport._convert_rows_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
     assert num_rsets == 2, f"Unexpected number of rsets {num_rsets}"
     assert libE_info["num_gpus"] == 3, f"Unexpected number for num_gpus {libE_info['num_gpus']}"
 
     gpus_per_rset = 0
+    libE_info, num_rsets = {}, None  # Reset
     with pytest.raises(InsufficientResourcesError):
-        num_rsets = AllocSupport._convert_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
+        num_rsets = AllocSupport._convert_rows_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
 
     H[H_rows]["num_gpus"] = 0
-    num_rsets = AllocSupport._convert_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
+    libE_info, num_rsets = {}, None  # Reset
+    num_rsets = AllocSupport._convert_rows_to_rsets(libE_info, user_params, H, H_rows, gpus_per_rset, units_str)
     assert num_rsets == 0, f"Unexpected number of rsets {num_rsets}"
     assert libE_info["num_gpus"] == 0, f"Unexpected number for num_gpus {libE_info['num_gpus']}"