diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst index f19b125251..a68da6ff0a 100644 --- a/docs/tutorials/executor_forces_tutorial.rst +++ b/docs/tutorials/executor_forces_tutorial.rst @@ -9,11 +9,14 @@ functions using the :doc:`executor<../executor/overview>`. This tutorial's calling script registers a compiled executable that simulates electrostatic forces between a collection of particles. The simulator function launches instances of this executable and reads output files to determine -if the run was successful. +the result. This tutorial uses libEnsemble's :doc:`MPI Executor<../executor/mpi_executor>`, which automatically detects available MPI runners and resources. +This example also uses a persistent generator. This generator runs on a +worker throughout the ensemble, producing new simulation parameters as requested. + Getting Started --------------- @@ -35,33 +38,42 @@ Calling Script Complete scripts for this example can be found in the forces_simple_ directory. -Let's begin by writing our calling script to parameterize our simulation and -generation functions and call libEnsemble. Create a Python file called `run_libe_forces.py` -containing: +Let's begin by writing our calling script to specify our simulation and +generation functions and call libEnsemble. Create a Python file called +`run_libe_forces.py` containing: .. code-block:: python :linenos: - :emphasize-lines: 15,19 + :emphasize-lines: 17,25, 28 #!/usr/bin/env python import os + import sys + import numpy as np from forces_simf import run_forces # Sim func from current dir - from libensemble.libE import libE - from libensemble.gen_funcs.sampling import uniform_random_sample - from libensemble.tools import parse_args, add_unique_random_streams + from libensemble import Ensemble + from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f from libensemble.executors import MPIExecutor + from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f + from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + if __name__ == "__main__": - # Parse number of workers, comms type, etc. from arguments - nworkers, is_manager, libE_specs, _ = parse_args() + # Initialize MPI Executor + exctr = MPIExecutor() - # Initialize MPI Executor instance - exctr = MPIExecutor() + # Register simulation executable with executor + sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") - # Register simulation executable with executor - sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") - exctr.register_app(full_path=sim_app, app_name="forces") + if not os.path.isfile(sim_app): + sys.exit("forces.x not found - please build first in ../forces_app dir") + + exctr.register_app(full_path=sim_app, app_name="forces") + + # Parse number of workers, comms type, etc. from arguments + ensemble = Ensemble(parse_args=True, executor=exctr) On line 15, we instantiate our :doc:`MPI Executor<../executor/mpi_executor>`. @@ -69,63 +81,91 @@ Registering an application is as easy as providing the full file-path and giving it a memorable name. This Executor will later be used within our simulation function to launch the registered app. -Next define the :ref:`sim_specs` and -:ref:`gen_specs` data structures. Recall that these -are used to specify to libEnsemble what user functions and input/output fields to -expect, and also to parameterize function instances without hard-coding: +On line 22, we initialize the ensemble. The :meth:`parse_args` +parameter is used to read `comms` and `nworkers` from the command line. This sets +the respective `libE_specs` options. + +Next, we add basic configuration for the ensemble. As one worker will run a persistent +generator, we calculate the number of workers that need resources to run simulations. +We also set `sim_dirs_make` so that a directory is created for each simulation. This +helps organize output and also helps prevent workers from overwriting previous results. .. code-block:: python - :linenos: + :linenos: + :lineno-start: 30 - # State the sim_f, inputs, outputs - sim_specs = { - "sim_f": run_forces, # sim_f, imported above - "in": ["x"], # Name of input for sim_f - "out": [("energy", float)], # Name, type of output from sim_f - } - - # State the gen_f, inputs, outputs, additional parameters - gen_specs = { - "gen_f": uniform_random_sample, # Generator function - "in": [], # Generator input - "out": [("x", float, (1,))], # Name, type, and size of data from gen_f - "user": { - "lb": np.array([1000]), # User parameters for the gen_f - "ub": np.array([3000]), - "gen_batch_size": 8, - }, - } + nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator -Our generation function will generate random numbers of particles (between -the ``"lb"`` and ``"ub"`` bounds) for our simulation function to evaluate via our -registered application. + # Persistent gen does not need resources + ensemble.libE_specs = LibeSpecs( + num_resource_sets=nsim_workers, + sim_dirs_make=True, + ) -The following additional instructs libEnsemble's workers -to each create and work within a separate directory each time they call a simulation -function. This helps organize output and also helps prevents workers from overwriting -previous results: +Next we define the :ref:`sim_specs` and +:ref:`gen_specs`. Recall that these are used to specify +to libEnsemble what user functions and input/output fields to +expect, and also to parameterize user functions: .. code-block:: python - :linenos: + :linenos: + :lineno-start: 38 + + ensemble.sim_specs = SimSpecs( + sim_f=run_forces, + inputs=["x"], + outputs=[("energy", float)], + ) + + ensemble.gen_specs = GenSpecs( + gen_f=gen_f, + inputs=[], # No input when start persistent generator + persis_in=["sim_id"], # Return sim_ids of evaluated points to generator + outputs=[("x", float, (1,))], + user={ + "initial_batch_size": nsim_workers, + "lb": np.array([1000]), # min particles + "ub": np.array([3000]), # max particles + }, + ) + +Next, configure an allocation function, which starts the one persistent +generator and farms out the simulations. We also tell it to wait for all +simulations to return their results, before generating more parameters. + +.. code-block:: python + :linenos: + :lineno-start: 56 + + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": False, # False causes batch returns + }, + ) + +Now we set :ref:`exit_criteria` to +exit after running eight simulations. - # Create and work inside separate per-simulation directories - libE_specs["sim_dirs_make"] = True +We also give each worker a seeded random stream, via the +:ref:`persis_info` option. +These can be used for random number generation if required. -After configuring :ref:`persis_info` and -:ref:`exit_criteria`, we initialize libEnsemble -by calling the primary :doc:`libE()<../libe_module>` routine: +Finally we :doc:`run<../libe_module>` the ensemble. .. code-block:: python :linenos: + :lineno-start: 64 - # Instruct libEnsemble to exit after this many simulations - exit_criteria = {"sim_max": 8} + # Instruct libEnsemble to exit after this many simulations + ensemble.exit_criteria = ExitCriteria(sim_max=8) - # Seed random streams for each worker, particularly for gen_f - persis_info = add_unique_random_streams({}, nworkers + 1) + # Seed random streams for each worker, particularly for gen_f + ensemble.add_random_streams() - # Launch libEnsemble - H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs) + # Run ensemble + ensemble.run() Exercise ^^^^^^^^ @@ -134,48 +174,55 @@ This may take some additional browsing of the docs to complete. Write an alternative Calling Script similar to above, but with the following differences: - 1. Add an additional worker directory so workers operate in ``/scratch/ensemble`` instead of the default current working directory. + 1. Set :ref:`libEnsemble's logger` to print debug messages. 2. Override the MPIExecutor's detected MPI runner with ``"openmpi"``. - 3. Set :ref:`libEnsemble's logger` to print debug messages. - 4. Use the :meth:`save_libE_output()` function to save the History array and ``persis_info`` to files after libEnsemble completes. + 3. Tell the allocation function to return results to the generator asynchronously. + 4. Use the ensemble function :meth:`save_output()` to save the History array and ``persis_info`` to files after libEnsemble completes. -.. dropdown:: **Click Here for Solution** +.. dropdown:: **Click Here for Solutions** + + **Soln 1.** Debug logging gives lots of information. .. code-block:: python :linenos: + :lineno-start: 13 - #!/usr/bin/env python - import os - import numpy as np - from forces_simf import run_forces # Sim func from current dir - - from libensemble import logger - from libensemble.libE import libE - from libensemble.gen_funcs.sampling import uniform_random_sample - from libensemble.tools import parse_args, add_unique_random_streams, save_libE_output - from libensemble.executors import MPIExecutor + from libensemble import logger + logger.set_level("DEBUG") - # Parse number of workers, comms type, etc. from arguments - nworkers, is_manager, libE_specs, _ = parse_args() + **Soln 2.** This can also be specified via :attr:`platform_specs` option. - # Adjust logger level - logger.set_level("DEBUG") + .. code-block:: python + :linenos: + :lineno-start: 16 - # Initialize MPI Executor instance + # Initialize MPI Executor exctr = MPIExecutor(custom_info={"mpi_runner": "openmpi"}) - ... + **Soln 3.** Set ``async_return`` to *True*. + + .. code-block:: python + :linenos: + :lineno-start: 56 + :emphasize-lines: 5 + + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": True, + }, + ) - # Instruct workers to operate somewhere else on the filesystem - libE_specs["ensemble_dir_path"] = "/scratch/ensemble" + **Soln 4.** This will save the output based on the name of the calling script. You + can give any string in place of ``__file__``. - ... + .. code-block:: python + :linenos: + :lineno-start: 72 - # Launch libEnsemble - H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs) + ensemble.save_output(__file__) - if is_manager: - save_libE_output(H, persis_info, __file__, nworkers) Simulation Function ------------------- @@ -193,37 +240,39 @@ for starters: import numpy as np - # To retrieve our MPI Executor instance - from libensemble.executors.executor import Executor - # Optional status codes to display in libE_stats.txt for each gen or sim - from libensemble.message_numbers import WORKER_DONE, TASK_FAILED + from libensemble.message_numbers import TASK_FAILED, WORKER_DONE + + def run_forces(H, persis_info, sim_specs, libE_info): + """Runs the forces MPI application""" - def run_forces(H, _, sim_specs): calc_status = 0 # Parse out num particles, from generator function particles = str(int(H["x"][0][0])) - # num particles, timesteps, also using num particles as seed + # app arguments: num particles, timesteps, also using num particles as seed args = particles + " " + str(10) + " " + particles - # Retrieve our MPI Executor instance - exctr = Executor.executor + # Retrieve our MPI Executor + exctr = libE_info["executor"] - # Submit our forces app for execution + # Submit our forces app for execution. task = exctr.submit(app_name="forces", app_args=args) # Block until the task finishes task.wait() + We retrieve the generated number of particles from ``H`` and construct an argument string for our launched application. The particle count doubles up as a random number seed here. -We then retrieve our previously instantiated Executor from the class definition, -where it was automatically stored as an attribute. +We then retrieve our previously instantiated Executor. libEnsemble will use +the MPI runner detected (or provided by platform options). +As `num_procs` (or similar) is not specified, libEnsemble will assign the processors +available to this worker. After submitting the "forces" app for execution, a :ref:`Task` object is returned that correlates with the launched app. @@ -233,7 +282,7 @@ for the task to complete via ``task.wait()``. We can assume that afterward, any results are now available to parse. Our application produces a ``forces.stat`` file that contains either energy -computations for every time-step or a "kill" message if particles were lost, which +computations for every timestep or a "kill" message if particles were lost, which indicates a bad run - this can be ignored for now. To complete our simulation function, parse the last energy value from the output file into @@ -243,11 +292,10 @@ to ``WORKER_DONE``. Otherwise, send back ``NAN`` and a ``TASK_FAILED`` status: .. code-block:: python :linenos: - - # Stat file to check for bad runs - statfile = "forces.stat" + :lineno-start: 27 # Try loading final energy reading, set the sim's status + statfile = "forces.stat" try: data = np.loadtxt(statfile) final_energy = data[-1] @@ -256,19 +304,20 @@ to ``WORKER_DONE``. Otherwise, send back ``NAN`` and a ``TASK_FAILED`` status: final_energy = np.nan calc_status = TASK_FAILED - # Define our output array, populate with energy reading - outspecs = sim_specs["out"] - output = np.zeros(1, dtype=outspecs) - output["energy"][0] = final_energy + # Define our output array, populate with energy reading + output = np.zeros(1, dtype=sim_specs["out"]) + output["energy"] = final_energy # Return final information to worker, for reporting to manager - return output, calc_status + return output, persis_info, calc_status ``calc_status`` will be displayed in the ``libE_stats.txt`` log file. That's it! As can be seen, with libEnsemble, it's relatively easy to get started -with launching applications. Behind the scenes, libEnsemble evaluates default -MPI runners and available resources and divides them among the workers. +with launching applications. + +Running the example +------------------- This completes our calling script and simulation function. Run libEnsemble with: @@ -276,62 +325,81 @@ This completes our calling script and simulation function. Run libEnsemble with: $ python run_libe_forces.py --comms local --nworkers [nworkers] -Output files---including ``forces.stat`` and files containing ``stdout`` and -``stderr`` content for each task---should appear in the current working +where ``nworkers`` is one more than the number of concurrent simulations. + +Output files (including ``forces.stat`` and files containing ``stdout`` and +``stderr`` content for each task) should appear in the current working directory. Overall workflow information should appear in ``libE_stats.txt`` and ``ensemble.log`` as usual. -For example, my ``libE_stats.txt`` resembled:: - - Worker 1: Gen no 1: gen Time: 0.001 Start: ... End: ... Status: Not set - Worker 1: sim_id 0: sim Time: 0.227 Start: ... End: ... Status: Completed - Worker 2: sim_id 1: sim Time: 0.426 Start: ... End: ... Status: Completed - Worker 1: sim_id 2: sim Time: 0.627 Start: ... End: ... Status: Completed - Worker 2: sim_id 3: sim Time: 0.225 Start: ... End: ... Status: Completed - Worker 1: sim_id 4: sim Time: 0.224 Start: ... End: ... Status: Completed - Worker 2: sim_id 5: sim Time: 0.625 Start: ... End: ... Status: Completed - Worker 1: sim_id 6: sim Time: 0.225 Start: ... End: ... Status: Completed - Worker 2: sim_id 7: sim Time: 0.626 Start: ... End: ... Status: Completed - -Where ``status`` is set based on the simulation function's returned ``calc_status``. - -My ``ensemble.log`` (on a ten-core laptop) resembled:: - - [0] ... libensemble.libE (INFO): Logger initializing: [workerID] precedes each line. [0] = Manager - [0] ... libensemble.libE (INFO): libE version v0.9.0 - [0] ... libensemble.manager (INFO): Manager initiated on node my_laptop - [0] ... libensemble.manager (INFO): Manager exit_criteria: {"sim_max": 8} - [1] ... libensemble.worker (INFO): Worker 1 initiated on node my_laptop - [2] ... libensemble.worker (INFO): Worker 2 initiated on node my_laptop - [1] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_0: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2023 10 2023 - [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_0: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2900 10 2900 - [1] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_0 finished with errcode 0 (FINISHED) - [1] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_1: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1288 10 1288 - [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_0 finished with errcode 0 (FINISHED) - [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_1: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2897 10 2897 - [1] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_1 finished with errcode 0 (FINISHED) - [1] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_2: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1623 10 1623 - [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_1 finished with errcode 0 (FINISHED) - [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_2: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1846 10 1846 - [1] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_2 finished with errcode 0 (FINISHED) - [1] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_3: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2655 10 2655 - [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_2 finished with errcode 0 (FINISHED) - [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_3: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1818 10 1818 - [1] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_3 finished with errcode 0 (FINISHED) - [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_3 finished with errcode 0 (FINISHED) - [0] ... libensemble.manager (INFO): Term test tripped: sim_max - [0] ... libensemble.manager (INFO): Term test tripped: sim_max - [0] ... libensemble.libE (INFO): Manager total time: 3.939 - -Note again that the ten cores were divided equally among two workers. - -That concludes this tutorial. -Each of these example files can be found in the repository in `examples/tutorials/forces_with_executor`_. +.. dropdown:: **Example run / output** + + + For example, after running: + + .. code-block:: bash + + $ python run_libe_forces.py --comms local --nworkers 3 + + my ``libE_stats.txt`` resembled:: + + Manager : Starting ensemble at: 2023-09-12 18:12:08.517 + Worker 2: sim_id 0: sim Time: 0.205 Start: ... End: ... Status: Completed + Worker 3: sim_id 1: sim Time: 0.284 Start: ... End: ... Status: Completed + Worker 2: sim_id 2: sim Time: 0.117 Start: ... End: ... Status: Completed + Worker 3: sim_id 3: sim Time: 0.294 Start: ... End: ... Status: Completed + Worker 2: sim_id 4: sim Time: 0.124 Start: ... End: ... Status: Completed + Worker 3: sim_id 5: sim Time: 0.174 Start: ... End: ... Status: Completed + Worker 3: sim_id 7: sim Time: 0.135 Start: ... End: ... Status: Completed + Worker 2: sim_id 6: sim Time: 0.275 Start: ... End: ... Status: Completed + Worker 1: Gen no 1: gen Time: 1.038 Start: ... End: ... Status: Persis gen finished + Manager : Exiting ensemble at: 2023-09-12 18:12:09.565 Time Taken: 1.048 + + + where ``status`` is set based on the simulation function's returned ``calc_status``. + + My ``ensemble.log`` (on a four-core laptop) resembled:: + + [0] ... libensemble.libE (INFO): Logger initializing: [workerID] precedes each line. [0] = Manager + [0] ... libensemble.libE (INFO): libE version v0.10.2+dev + [0] ... libensemble.manager (INFO): Manager initiated on node shuds + [0] ... libensemble.manager (INFO): Manager exit_criteria: {'sim_max': 8} + [2] ... libensemble.worker (INFO): Worker 2 initiated on node shuds + [3] ... libensemble.worker (INFO): Worker 3 initiated on node shuds + [1] ... libensemble.worker (INFO): Worker 1 initiated on node shuds + [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_0: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2023 10 2023 + [3] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_0: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2900 10 2900 + [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_0 finished with errcode 0 (FINISHED) + [3] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_0 finished with errcode 0 (FINISHED) + [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_1: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1288 10 1288 + [3] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_1: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2897 10 2897 + [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_1 finished with errcode 0 (FINISHED) + [3] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_1 finished with errcode 0 (FINISHED) + [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_2: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1623 10 1623 + [3] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_2: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1846 10 1846 + [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_2 finished with errcode 0 (FINISHED) + [3] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_2 finished with errcode 0 (FINISHED) + [2] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_3: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2655 10 2655 + [3] ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_3: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1818 10 1818 + [3] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_3 finished with errcode 0 (FINISHED) + [2] ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_3 finished with errcode 0 (FINISHED) + [0] ... libensemble.manager (INFO): Term test tripped: sim_max + [0] ... libensemble.manager (INFO): Term test tripped: sim_max + [0] ... libensemble.libE (INFO): Manager total time: 1.043 + + Note again that the four cores were divided equally among two workers that run simulations. + +That concludes this tutorial. Each of these example files can be found in the +repository in `examples/tutorials/forces_with_executor`_. For further experimentation, we recommend trying out this libEnsemble tutorial workflow on a cluster or multi-node system, since libEnsemble can also manage those resources and is developed to coordinate computations at huge scales. -See ref:`HPC platform guides` for more information. +See :ref:`HPC platform guides` for more information. + +See the :doc:`forces_gpu tutorial` for a similar workflow +including GPUs. That tutorial also shows how to dynamically assign resources to +each simulation. Please feel free to contact us or open an issue on GitHub_ if this tutorial workflow doesn't work properly on your cluster or other compute resource. @@ -341,7 +409,7 @@ Exercises These may require additional browsing of the documentation to complete. - 1. Adjust :meth:`submit()` to launch with four processes. + 1. Adjust :meth:`submit()` to launch with four processes. 2. Adjust ``submit()`` again so the app's ``stdout`` and ``stderr`` are written to ``stdout.txt`` and ``stderr.txt`` respectively. 3. Add a fourth argument to the args line to make 20% of simulations go bad. 4. Construct a ``while not task.finished:`` loop that periodically sleeps for a tenth of a second, calls :meth:`task.poll()`, @@ -350,8 +418,10 @@ These may require additional browsing of the documentation to complete. .. dropdown:: **Click Here for Solution** + + Showing updated sections only (``---`` refers to snips where code is unchanged). + .. code-block:: python - :linenos: import time diff --git a/docs/tutorials/forces_gpu_tutorial.rst b/docs/tutorials/forces_gpu_tutorial.rst index 63792aacf6..91429044b4 100644 --- a/docs/tutorials/forces_gpu_tutorial.rst +++ b/docs/tutorials/forces_gpu_tutorial.rst @@ -13,26 +13,27 @@ number of particles (allows live GPU usage to be viewed). In the first example, each worker will be using one GPU. The code will assign the GPUs available to each worker, using the appropriate method. This works on systems -using nVidia, AMD and intel GPUs. +using **Nvidia**, **AMD**, and **Intel** GPUs without modifying the scripts. Videos demonstrate running this example on Perlmutter_, Spock_, and Polaris_. *The first two videos are from an earlier release - you no longer need to change -particle count or modify the `forces.c` file).* +particle count or modify the `forces.c` file).*. Also, on Polaris, it is no +longer necessary to change the MPI runner. Simulation function ------------------- The ``sim_f`` (``forces_simf.py``) is as follows. The lines that are different -to the forces simple example are highlighted: +to the simple forces example are highlighted: .. code-block:: python :linenos: - :emphasize-lines: 29-30, 37 + :emphasize-lines: 31-32, 39 import numpy as np - # To retrieve our MPI Executor - from libensemble.executors.executor import Executor + # Optional status codes to display in libE_stats.txt for each gen or sim + from libensemble.message_numbers import TASK_FAILED, WORKER_DONE # Optional - to print GPU settings from libensemble.tools.test_support import check_gpu_setting @@ -44,6 +45,8 @@ to the forces simple example are highlighted: Assigns one MPI rank to each GPU assigned to the worker. """ + calc_status = 0 + # Parse out num particles, from generator function particles = str(int(H["x"][0][0])) @@ -51,7 +54,7 @@ to the forces simple example are highlighted: args = particles + " " + str(10) + " " + particles # Retrieve our MPI Executor - exctr = Executor.executor + exctr = libE_info["executor"] # Submit our forces app for execution. task = exctr.submit( @@ -67,29 +70,36 @@ to the forces simple example are highlighted: # Optional - prints GPU assignment (method and numbers) check_gpu_setting(task, assert_setting=False, print_setting=True) - # Stat file to check for bad runs + # Try loading final energy reading, set the sim's status statfile = "forces.stat" + try: + data = np.loadtxt(statfile) + final_energy = data[-1] + calc_status = WORKER_DONE + except Exception: + final_energy = np.nan + calc_status = TASK_FAILED - # Read final energy - data = np.loadtxt(statfile) - final_energy = data[-1] - - # Define our output array, populate with energy reading + # Define our output array, populate with energy reading output = np.zeros(1, dtype=sim_specs["out"]) - output["energy"][0] = final_energy + output["energy"] = final_energy + # Return final information to worker, for reporting to manager + return output, persis_info, calc_status - return output -Line 37 simply prints out how the GPUs were assigned. If this is not as desired, +Lines 31-32 tell the executor to use the GPUs assigned to this worker, and +to match processors (MPI ranks) to GPUs. + +The user can also set ``num_procs`` and ``num_gpus`` in the generator as in +the `forces_gpu_var_resources`_ example, and skip lines 31-32. + +Line 37 simply prints out how the GPUs were assigned. If this is not as expected, a :attr:`platform_specs` *libE_specs* option can be provided in the calling script. Alternatively, for known systems, the LIBE_PLATFORM environment variable can be set. -The user can also set ``num_procs`` and ``num_gpus`` in the generator as in -the `test_GPU_variable_resources.py`_ example. - -While this is sufficient for many users, note that it is possible to query +While this is sufficient for many/most users, note that it is possible to query the resources assigned to *this* worker (nodes and partitions of nodes), and use this information however you want. @@ -153,7 +163,7 @@ and use this information however you want. return output - The above code will assign a GPU to each worker on CUDA capable systems, + The above code will assign a GPU to each worker on CUDA-capable systems, so long as the number of workers is chosen to fit the resources. If you want to have one rank with multiple GPUs, then change source lines 30/31 @@ -206,15 +216,11 @@ Running the example ------------------- As an example, if you have been allocated two nodes, each with four GPUs, then assign -eight workers. For example:: +nine workers (the extra worker runs the persistent generator). - python run_libe_forces.py --comms local --nworkers 8 +For example:: -Note that if you are running one persistent generator that does not require -resources, then assign nine workers and fix the number of *resource_sets* in -your calling script:: - - libE_specs["num_resource_sets"] = 8 + python run_libe_forces.py --comms local --nworkers 9 See :ref:`zero resource workers` for more ways to express this. @@ -228,29 +234,31 @@ forces run. Varying resources ----------------- -The same code can be used when varying worker resources. In this case, you may -add an integer field called ``resource_sets`` as a ``gen_specs["out"]`` in your -calling script. - -In the generator function, assign the ``resource_sets`` field of -:ref:`H` for each point generated. For example -if a larger simulation requires two MPI tasks (and two GPUs), set the ``resource_sets`` -field to *2* for that sim_id in the generator function. +A variant of this example where you may specify any number of processors +and GPUs for each simulation is given in the `forces_gpu_var_resources`_ example. -The calling script run_libe_forces.py_ contains alternative commented-out lines for -a variable resource example. Search for "Uncomment for var resources" - -In this case, the simulator function will work unmodified, assigning one CPU processor -and one GPU to each MPI rank. +In this example, when simulations are parameterized in the generator function, +the ``gen_specs["out"]`` field ``num_gpus`` is set for each simulation (based +on the number of particles). These values will automatically be used for each +simulation (they do not need to be passed as a ``sim_specs["in"]``). Further guidance on varying the resources assigned to workers can be found under the :doc:`resource manager<../resource_manager/resources_index>` section. +Multiple Applications +--------------------- + +Another variant of this example, forces_multi_app_, has two applications, one that +uses GPUs, and another that only uses CPUs. The dynamic resource management can +manage both types of resources and assign these to the same nodes concurrently, for +maximum efficiency. + Checking GPU usage ------------------ The output of `forces.x` will say if it has run on the host or device. When running -libEnsemble, this can be found under the ``ensemble`` directory. +libEnsemble, this can be found in the simulation directories (under the ``ensemble`` +directory). You can check you are running forces on the GPUs as expected by using profiling tools and/or by using a monitoring utility. For NVIDIA GPUs, for example, the **Nsight** profiler is @@ -295,12 +303,10 @@ that runs 8 workers on 2 nodes: export MPICH_GPU_SUPPORT_ENABLED=1 export SLURM_EXACT=1 - export SLURM_MEM_PER_NODE=0 - python run_libe_forces.py --comms local --nworkers 8 + python run_libe_forces.py --comms local --nworkers 9 -where ``SLURM_EXACT`` and ``SLURM_MEM_PER_NODE`` are set to prevent -resource conflicts on each node. +where ``SLURM_EXACT`` is set to help prevent resource conflicts on each node. .. _forces_gpu: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu .. _forces.c: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_app/forces.c @@ -309,4 +315,5 @@ resource conflicts on each node. .. _Spock: https://www.youtube.com/watch?v=XHXcslDORjU .. _Polaris: https://youtu.be/Ff0dYYLQzoU .. _run_libe_forces.py: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py -.. _test_GPU_variable_resources.py: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/regression_tests/test_GPU_variable_resources.py +.. _forces_gpu_var_resources: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py +.. _forces_multi_app: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py diff --git a/libensemble/gen_funcs/persistent_sampling_var_resources.py b/libensemble/gen_funcs/persistent_sampling_var_resources.py index bc9dc638f6..af72512e61 100644 --- a/libensemble/gen_funcs/persistent_sampling_var_resources.py +++ b/libensemble/gen_funcs/persistent_sampling_var_resources.py @@ -2,6 +2,10 @@ Each function generates points uniformly over the domain defined by ``gen_specs["user"]["ub"]`` and ``gen_specs["user"]["lb"]``. + +Most functions use a random request of resources over a range, setting num_procs, num_gpus or +resource sets. The function ``uniform_sample_with_var_gpus`` uses the ``x`` value to determine +the number of GPUs requested. """ import numpy as np @@ -56,6 +60,47 @@ def uniform_sample(_, persis_info, gen_specs, libE_info): return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG +def uniform_sample_with_var_gpus(_, persis_info, gen_specs, libE_info): + """ + Requests a number of GPUs based on the ``x`` value to be used in the evaluation + of the generated points. By default, simulations will assign one MPI processor + per GPU. + + Note that the ``num_gpus`` gen_specs["out"] option (similar to ``num_procs``) does + not need to be passed as a sim_specs["in"]. It will automatically be passed to + simulation functions and used by any MPI Executor unless overridden in the + ``executor.submit`` function. + + .. seealso:: + `test_GPU_variable_resources.py `_ + """ # noqa + + b, n, lb, ub = _get_user_params(gen_specs["user"]) + rng = persis_info["rand_stream"] + ps = PersistentSupport(libE_info, EVAL_GEN_TAG) + tag = None + max_gpus = gen_specs["user"]["max_gpus"] + + while tag not in [STOP_TAG, PERSIS_STOP]: + x = rng.uniform(lb, ub, (b, n)) + bucket_size = (ub[0] - lb[0]) / max_gpus + + # Determine number of GPUs based on linear split over x range (first dimension). + ngpus = [int((num - lb[0]) / bucket_size) + 1 for num in x[:, 0]] + + H_o = np.zeros(b, dtype=gen_specs["out"]) + H_o["x"] = x + H_o["num_gpus"] = ngpus + + print(f"GEN created {b} sims requiring {ngpus} GPUs", flush=True) + + tag, Work, calc_in = ps.send_recv(H_o) + if hasattr(calc_in, "__len__"): + b = len(calc_in) + + return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG + + def uniform_sample_with_procs_gpus(_, persis_info, gen_specs, libE_info): """ Randomly requests a different number of processors and gpus to be used in the @@ -137,11 +182,15 @@ def uniform_sample_diff_simulations(_, persis_info, gen_specs, libE_info): while tag not in [STOP_TAG, PERSIS_STOP]: H_o = np.zeros(b, dtype=gen_specs["out"]) H_o["x"] = rng.uniform(lb, ub, (b, n)) + nprocs = rng.integers(1, gen_specs["user"]["max_procs"] + 1, b) use_gpus = rng.choice([True, False], b) H_o["num_procs"] = nprocs H_o["num_gpus"] = np.where(use_gpus, nprocs, 0) - print(f"GEN created {b} sims requiring {nprocs} procs. Use GPUs {use_gpus}", flush=True) + if "app_type" in H_o.dtype.names: + H_o["app_type"] = np.where(use_gpus, "gpu_app", "cpu_app") + + print(f"\nGEN created {b} sims requiring {nprocs} procs. Use GPUs {use_gpus}", flush=True) tag, Work, calc_in = ps.send_recv(H_o) if hasattr(calc_in, "__len__"): diff --git a/libensemble/manager.py b/libensemble/manager.py index e06160ca8d..4c362f7bf0 100644 --- a/libensemble/manager.py +++ b/libensemble/manager.py @@ -226,6 +226,7 @@ def __init__( raise ManagerException( "Manager errored on initialization", "Ensemble directory already existed and wasn't empty.", + "To reuse ensemble dir, set libE_specs['reuse_output_dir'] = True", e, ) diff --git a/libensemble/tests/regression_tests/test_GPU_variable_resources.py b/libensemble/tests/regression_tests/test_GPU_variable_resources.py index 7f272d75cc..c346cc0bb0 100644 --- a/libensemble/tests/regression_tests/test_GPU_variable_resources.py +++ b/libensemble/tests/regression_tests/test_GPU_variable_resources.py @@ -30,7 +30,8 @@ from libensemble import Ensemble from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f from libensemble.executors.mpi_executor import MPIExecutor -from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_procs_gpus as gen_f +from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_procs_gpus as gen_f1 +from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f2 # Import libEnsemble items for this test from libensemble.sim_funcs import six_hump_camel @@ -50,12 +51,13 @@ exctr = MPIExecutor() exctr.register_app(full_path=six_hump_camel_app, app_name="six_hump_camel") - gpu_test = Ensemble(parse_args=True) + gpu_test = Ensemble(parse_args=True, executor=exctr) gpu_test.libE_specs = LibeSpecs( num_resource_sets=gpu_test.nworkers - 1, resource_info={"cores_on_node": (8, 16), "gpus_on_node": 4}, sim_dirs_make=True, ensemble_dir_path="./ensemble_GPU_variable_w" + str(gpu_test.nworkers), + reuse_output_dir=True, ) gpu_test.sim_specs = SimSpecs( @@ -65,12 +67,12 @@ user={"dry_run": False}, ) gpu_test.gen_specs = GenSpecs( - gen_f=gen_f, + gen_f=gen_f1, persis_in=["f", "x", "sim_id"], out=[("num_procs", int), ("num_gpus", int), ("x", float, 2)], user={ "initial_batch_size": gpu_test.nworkers - 1, - "max_procs": (gpu_test.nworkers - 1) // 2, # Any sim created can req. 1 worker up to max + "max_procs": gpu_test.nworkers - 1, # Any sim created can req. 1 worker up to max "lb": np.array([-3, -2]), "ub": np.array([3, 2]), }, @@ -84,10 +86,22 @@ }, ) + # Run with random num_procs/num_gpus for each simulation gpu_test.persis_info = add_unique_random_streams({}, gpu_test.nworkers + 1) - gpu_test.exit_criteria = ExitCriteria(sim_max=40) + gpu_test.exit_criteria = ExitCriteria(sim_max=20) + + gpu_test.run() + if gpu_test.is_manager: + assert gpu_test.flag == 0 + + # Run with num_gpus based on x[0] for each simulation + gpu_test.gen_specs.gen_f = gen_f2 + gpu_test.gen_specs.user["max_gpus"] = gpu_test.nworkers - 1 + gpu_test.persis_info = add_unique_random_streams({}, gpu_test.nworkers + 1) + gpu_test.exit_criteria = ExitCriteria(sim_max=20) gpu_test.run() if gpu_test.is_manager: assert gpu_test.flag == 0 + gpu_test.save_output(__file__) diff --git a/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py b/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py index f78bcff4e5..7ead0a5958 100644 --- a/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py +++ b/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py @@ -26,7 +26,7 @@ This test must be run with 9 or more workers (8 sim workers), in order to resource all works units. More generally: -((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_resource_sets] + ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs] """ @@ -55,7 +55,13 @@ # Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). if __name__ == "__main__": - gpu_test = Ensemble(parse_args=True) + + # Get paths for applications to run + six_hump_camel_app = six_hump_camel.__file__ + exctr = MPIExecutor() + exctr.register_app(full_path=six_hump_camel_app, app_name="six_hump_camel") + + gpu_test = Ensemble(parse_args=True, executor=exctr) nworkers = gpu_test.nworkers gpu_test.libE_specs = LibeSpecs( num_resource_sets=gpu_test.nworkers - 1, @@ -64,11 +70,6 @@ ensemble_dir_path="./ensemble_GPU_variable_multi_task_w" + str(nworkers), ) - # Get paths for applications to run - six_hump_camel_app = six_hump_camel.__file__ - exctr = MPIExecutor() - exctr.register_app(full_path=six_hump_camel_app, app_name="six_hump_camel") - gpu_test.sim_specs = SimSpecs( sim_f=sim_f, inputs=["x"], diff --git a/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py index 39baa24ef9..2348aac50d 100644 --- a/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py +++ b/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py @@ -2,8 +2,6 @@ import time import numpy as np - -from libensemble.executors.executor import Executor from libensemble.message_numbers import TASK_FAILED, WORKER_DONE, WORKER_KILL MAX_SEED = 32767 @@ -70,7 +68,8 @@ def run_forces(H, persis_info, sim_specs, libE_info): sim_particles = perturb(sim_particles, seed, particle_variance) print(f"seed: {seed} particles: {sim_particles}") - exctr = Executor.executor # Get Executor + # Retrieve our MPI Executor + exctr = libE_info["executor"] args = str(int(sim_particles)) + " " + str(sim_timesteps) + " " + str(seed) + " " + str(kill_rate) # task = exctr.submit( app_name="forces", num_procs=cores, app_args=args, stdout="out.txt", stderr="err.txt") diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py index c609c26afb..c65cc8c5a1 100644 --- a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py +++ b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py @@ -1,8 +1,5 @@ import numpy as np -# To retrieve our MPI Executor -from libensemble.executors.executor import Executor - # Optional status codes to display in libE_stats.txt for each gen or sim from libensemble.message_numbers import TASK_FAILED, WORKER_DONE @@ -25,7 +22,7 @@ def run_forces(H, persis_info, sim_specs, libE_info): args = particles + " " + str(10) + " " + particles # Retrieve our MPI Executor - exctr = Executor.executor + exctr = libE_info["executor"] # Submit our forces app for execution. task = exctr.submit( @@ -41,10 +38,8 @@ def run_forces(H, persis_info, sim_specs, libE_info): # Optional - prints GPU assignment (method and numbers) check_gpu_setting(task, assert_setting=False, print_setting=True) - # Stat file to check for bad runs - statfile = "forces.stat" - # Try loading final energy reading, set the sim's status + statfile = "forces.stat" try: data = np.loadtxt(statfile) final_energy = data[-1] diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md index 040b653509..7099ad9717 100644 --- a/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md +++ b/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md @@ -15,13 +15,17 @@ for your platform (these will include -DGPU) Then return here and run: - python run_libe_forces.py --comms local --nworkers 4 + python run_libe_forces.py --comms local --nworkers 5 + +This will run libEnsemble with five workers; one for the persistent generator, and +four for forces simulations (so four GPUs are required). By default, each run of forces will use one CPU and one GPU. The `forces.c` code can also be MPI parallel and will use one GPU for each CPU rank, assuming an even split of ranks -across nodes. +across nodes. There must be enough GPUs per simulation worker (for a more dynamic example, +see `forces_gpu_var_resources`). -## Running test run_libe_forces.py +## Detailed instructions Naive Electrostatics Code Test diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py index aceed59e17..9099633271 100644 --- a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py +++ b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py @@ -6,12 +6,13 @@ live-checking GPU usage. The forces.c application should be built by setting the GPU preprocessor condition -in addition to openMP GPU flags for the given system. See examples in -../forces_app/build_forces.sh. We recommend running forces.x standalone first +(usually -DGPU) in addition to openMP GPU flags for the given system. See examples +in ../forces_app/build_forces.sh. We recommend running forces.x standalone first and confirm it is running on the GPU (this is given clearly in the output). -An alternative variable resource generator is available (search 'var resources' -in this script and uncomment relevant lines). +To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You +will compile forces without -DGPU option. It is recommended that the lb/ub for +particle counts are reduced for CPU performance. """ import os @@ -20,76 +21,75 @@ import numpy as np from forces_simf import run_forces # Sim func from current dir +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f from libensemble.executors import MPIExecutor - -# Fixed resources (one resource set per worker) -from libensemble.gen_funcs.sampling import uniform_random_sample as gen_f -from libensemble.libE import libE -from libensemble.tools import add_unique_random_streams, parse_args - -# Uncomment for var resources (checksum will change due to rng differences) -# from libensemble.gen_funcs.sampling import uniform_random_sample_with_variable_resources as gen_f - -# Parse number of workers, comms type, etc. from arguments -nworkers, is_manager, libE_specs, _ = parse_args() - -# To test on system without GPUs - compile forces without -DGPU and mock GPUs with this line. -# libE_specs["resource_info"] = {"gpus_on_node": 4} - -# Initialize MPI Executor instance -exctr = MPIExecutor() - -# Register simulation executable with executor -sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") - -if not os.path.isfile(sim_app): - sys.exit("forces.x not found - please build first in ../forces_app dir") - -exctr.register_app(full_path=sim_app, app_name="forces") - -# State the sim_f, inputs, outputs -sim_specs = { - "sim_f": run_forces, # sim_f, imported above - "in": ["x"], # Name of input for sim_f - "out": [("energy", float)], # Name, type of output from sim_f -} - -# State the gen_f, inputs, outputs, additional parameters -gen_specs = { - "gen_f": gen_f, # Generator function - "in": [], # Generator input - "out": [ - ("x", float, (1,)), # Name, type and size of data from gen_f - # ("resource_sets", int) # Uncomment for var resources - ], - "user": { - "lb": np.array([50000]), # fewest particles (changing will change checksum) - "ub": np.array([100000]), # max particles (changing will change checksum) - "gen_batch_size": 8, - # "max_resource_sets": nworkers # Uncomment for var resources - }, -} - -# Create and work inside separate per-simulation directories -libE_specs["sim_dirs_make"] = True - -# Uncomment to see resource sets in libE_stats.txt - useful with var resources -# libE_specs["stats_fmt"] = {"show_resource_sets": True} - -# Instruct libEnsemble to exit after this many simulations -exit_criteria = {"sim_max": 8} # changing will change checksum - -# Seed random streams for each worker, particularly for gen_f -persis_info = add_unique_random_streams({}, nworkers + 1) - -# Launch libEnsemble -H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs) - -# This is for configuration of this test (inc. lb/ub and sim_max values) -if is_manager: - if exit_criteria["sim_max"] == 8: - chksum = np.sum(H["energy"]) - assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}" - print("Checksum passed") - else: - print("Run complete. A checksum has not been provided for the given sim_max") +from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +if __name__ == "__main__": + + # Initialize MPI Executor + exctr = MPIExecutor() + sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") + + if not os.path.isfile(sim_app): + sys.exit("forces.x not found - please build first in ../forces_app dir") + + exctr.register_app(full_path=sim_app, app_name="forces") + + # Parse number of workers, comms type, etc. from arguments + ensemble = Ensemble(parse_args=True, executor=exctr) + nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator + + # Persistent gen does not need resources + ensemble.libE_specs = LibeSpecs( + num_resource_sets=nsim_workers, + sim_dirs_make=True, + # resource_info = {"gpus_on_node": 4} # for mocking GPUs + ) + + ensemble.sim_specs = SimSpecs( + sim_f=run_forces, + inputs=["x"], + outputs=[("energy", float)], + ) + + ensemble.gen_specs = GenSpecs( + gen_f=gen_f, + inputs=[], # No input when start persistent generator + persis_in=["sim_id"], # Return sim_ids of evaluated points to generator + outputs=[("x", float, (1,))], + user={ + "initial_batch_size": nsim_workers, + "lb": np.array([50000]), # min particles + "ub": np.array([100000]), # max particles + }, + ) + + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": False, # False causes batch returns + }, + ) + + # Instruct libEnsemble to exit after this many simulations + ensemble.exit_criteria = ExitCriteria(sim_max=8) + + # Seed random streams for each worker, particularly for gen_f + ensemble.add_random_streams() + + # Run ensemble + ensemble.run() + + if ensemble.is_manager: + # Note, this will change if change sim_max, nworkers, lb/ub etc... + if ensemble.exit_criteria.sim_max == 8: + chksum = np.sum(ensemble.H["energy"]) + assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}" + print("Checksum passed") + else: + print("Run complete. A checksum has not been provided for the given sim_max") diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh index 78bc7cc905..f7b74739b4 100644 --- a/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh +++ b/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh @@ -1,12 +1,11 @@ #!/bin/bash #SBATCH -J libE_small_test -#SBATCH -A +#SBATCH -A #SBATCH -C gpu #SBATCH --time 10 -#SBATCH --nodes 2 +#SBATCH --nodes 1 export MPICH_GPU_SUPPORT_ENABLED=1 export SLURM_EXACT=1 -export SLURM_MEM_PER_NODE=0 -python run_libe_forces.py --comms local --nworkers 8 +python run_libe_forces.py --comms local --nworkers 5 diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/readme.md deleted file mode 100644 index 96ba904286..0000000000 --- a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/readme.md +++ /dev/null @@ -1,60 +0,0 @@ -## Tutorial - -This example is similar to that explained in the tutorial **Executor - Assign GPUs**, -but using a persistent generator. - -https://libensemble.readthedocs.io/en/develop/tutorials/forces_gpu_tutorial.html - -## QuickStart - -Go to `forces_app` directory: - - cd ../forces_app - -Compile **forces.x** using one of the GPU build lines in `build_forces.sh` or similar -for your platform (these will include -DGPU) - -Then return here and run: - - python run_libe_forces.py --comms local --nworkers 5 - -By default, each run of forces will use one CPU and one GPU. The `forces.c` code can also -be MPI parallel and will use one GPU for each CPU rank, assuming an even split of ranks -across nodes. The extra worker is used for the persistent generator. - -## Running test run_libe_forces.py - -Naive Electrostatics Code Test - -This is a synthetic, highly configurable simulation function. This test aims -to show libEnsemble's capability to set assign GPU resources as needed by each -worker and to launch application instances via the `MPIExecutor`. - -### Forces Mini-App - -A system of charged particles is initialized and simulated over a number of time-steps. - -See `forces_app` directory for details. - -### Running with libEnsemble. - -A random sample of seeds is taken and used as input to the sim func (forces miniapp). - -In forces_app directory, modify build_forces.sh for target platform and run to -build forces.x: - - ./build_forces.sh - -Then to run with local comms (multiprocessing) with one manager and `N` workers: - - python run_libe_forces.py --comms local --nworkers N - -To run with MPI comms using one manager and `N-1` workers: - - mpirun -np N python run_libe_forces.py - -Application parameters can be adjusted in the file `run_libe_forces.py`. - -To remove output before the next run: - - ./cleanup.sh diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py deleted file mode 100644 index 7d40432a60..0000000000 --- a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python - -""" -This example is based on the simple forces test. The default number of -particles is increased considerably to give perceptible time on the GPUs when -live-checking GPU usage. - -The forces.c application should be built by setting the GPU preprocessor condition -in addition to openMP GPU flags for the given system. See examples in -../forces_app/build_forces.sh. We recommend running forces.x standalone first -and confirm it is running on the GPU (this is given clearly in the output). - -An alternative variable resource generator is available (search 'var resources' -in this script and uncomment relevant lines). -""" - -import os -import sys - -import numpy as np -from forces_simf import run_forces # Sim func from current dir - -from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f -from libensemble.executors import MPIExecutor - -# Fixed resources (one resource set per worker) - persistent gen -from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f -from libensemble.libE import libE -from libensemble.tools import add_unique_random_streams, parse_args - -# Uncomment for var resources (checksum will change due to rng differences) -# from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample as gen_f - - -# Parse number of workers, comms type, etc. from arguments -nworkers, is_manager, libE_specs, _ = parse_args() - -nsim_workers = nworkers - 1 -libE_specs["num_resource_sets"] = nsim_workers # Persistent gen does not need resources - -# To test on system without GPUs - compile forces without -DGPU and mock GPUs with this line. -# libE_specs["resource_info"] = {"gpus_on_node": 4} - -# Initialize MPI Executor instance -exctr = MPIExecutor() - -# Register simulation executable with executor -sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") - -if not os.path.isfile(sim_app): - sys.exit("forces.x not found - please build first in ../forces_app dir") - -exctr.register_app(full_path=sim_app, app_name="forces") - -# State the sim_f, inputs, outputs -sim_specs = { - "sim_f": run_forces, # sim_f, imported above - "in": ["x"], # Name of input for sim_f - "out": [("energy", float)], # Name, type of output from sim_f -} - -# State the gen_f, inputs, outputs, additional parameters -gen_specs = { - "gen_f": gen_f, # Generator function - "in": [], # Generator input - "persis_in": ["sim_id"], # Just send something back to gen to get number of new points. - "out": [ - ("x", float, (1,)), # Name, type and size of data from gen_f - # ("resource_sets", int) # Uncomment for var resources - ], - "user": { - "lb": np.array([50000]), # fewest particles (changing will change checksum) - "ub": np.array([100000]), # max particles (changing will change checksum) - "initial_batch_size": nsim_workers, - # "max_resource_sets": nsim_workers # Uncomment for var resources - }, -} - -alloc_specs = { - "alloc_f": alloc_f, - "user": { - "give_all_with_same_priority": False, - "async_return": False, # False causes batch returns - }, -} - -# Create and work inside separate per-simulation directories -libE_specs["sim_dirs_make"] = True - -# Uncomment to see resource sets in libE_stats.txt - useful with var resources -# libE_specs["stats_fmt"] = {"show_resource_sets": True} - -# Instruct libEnsemble to exit after this many simulations -exit_criteria = {"sim_max": 8} # changing will change checksum - -# Seed random streams for each worker, particularly for gen_f -persis_info = add_unique_random_streams({}, nworkers + 1) - -# Launch libEnsemble -H, persis_info, flag = libE( - sim_specs, gen_specs, exit_criteria, persis_info=persis_info, alloc_specs=alloc_specs, libE_specs=libE_specs -) - -# This is for configuration of this test (inc. lb/ub and sim_max values) -if is_manager: - if exit_criteria["sim_max"] == 8: - chksum = np.sum(H["energy"]) - assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}" - print("Checksum passed") - else: - print("Run complete. A checksum has not been provided for the given sim_max") diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/submit_perlmutter.sh deleted file mode 100644 index 73b1fc39ef..0000000000 --- a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/submit_perlmutter.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -#SBATCH -J libE_small_test -#SBATCH -A m4272_g -#SBATCH -C gpu -#SBATCH --time 10 -#SBATCH --nodes 1 - -# export MPICH_GPU_SUPPORT_ENABLED=1 -# export SLURM_EXACT=1 -# export SLURM_MEM_PER_NODE=0 - -python run_libe_forces.py --comms local --nworkers 5 diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/cleanup.sh similarity index 100% rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/cleanup.sh rename to libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/cleanup.sh diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py similarity index 64% rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/forces_simf.py rename to libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py index cfdb126e0e..d79af981c5 100644 --- a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/forces_simf.py +++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py @@ -1,8 +1,5 @@ import numpy as np -# To retrieve our MPI Executor and resources instances -from libensemble.executors.executor import Executor - # Optional status codes to display in libE_stats.txt for each gen or sim from libensemble.message_numbers import TASK_FAILED, WORKER_DONE @@ -11,9 +8,8 @@ def run_forces(H, persis_info, sim_specs, libE_info): - """Launches the forces MPI app and auto-assigns ranks and GPU resources. - - Assigns one MPI rank to each GPU assigned to the worker. + """Launches the forces MPI app and auto-assigns ranks and GPU resources + (based on generator output). """ calc_status = 0 @@ -24,17 +20,11 @@ def run_forces(H, persis_info, sim_specs, libE_info): # app arguments: num particles, timesteps, also using num particles as seed args = particles + " " + str(10) + " " + particles - # Retrieve our MPI Executor instance and resources - exctr = Executor.executor + # Retrieve our MPI Executor + exctr = libE_info["executor"] - # Submit our forces app for execution. Block until the task starts. - task = exctr.submit( - app_name="forces", - app_args=args, - # num_procs = 1, - auto_assign_gpus=True, - match_procs_to_gpus=True, - ) + # Submit our forces app for execution. + task = exctr.submit(app_name="forces", app_args=args) # Block until the task finishes task.wait() @@ -42,10 +32,8 @@ def run_forces(H, persis_info, sim_specs, libE_info): # Optional - prints GPU assignment (method and numbers) check_gpu_setting(task, assert_setting=False, print_setting=True) - # Stat file to check for bad runs - statfile = "forces.stat" - # Try loading final energy reading, set the sim's status + statfile = "forces.stat" try: data = np.loadtxt(statfile) final_energy = data[-1] @@ -54,9 +42,8 @@ def run_forces(H, persis_info, sim_specs, libE_info): final_energy = np.nan calc_status = TASK_FAILED - # Define our output array, populate with energy reading - outspecs = sim_specs["out"] - output = np.zeros(1, dtype=outspecs) + # Define our output array, populate with energy reading + output = np.zeros(1, dtype=sim_specs["out"]) output["energy"] = final_energy # Return final information to worker, for reporting to manager diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md new file mode 100644 index 0000000000..86d7141ef2 --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md @@ -0,0 +1,29 @@ +## Tutorial + +This example referred to in the tutorial **Executor - Assign GPUs**. + +When the generator creates parameters for each simulation, it sets a number +of GPUs required for the simulation. Resources are dynamically assigned to +the simulation workers. + +https://libensemble.readthedocs.io/en/develop/tutorials/forces_gpu_tutorial.html + +## QuickStart + +Go to `forces_app` directory: + + cd ../forces_app + +Compile **forces.x** using one of the GPU build lines in `build_forces.sh` or similar +for your platform (these will include -DGPU) + +Then return here and run: + + python run_libe_forces.py --comms local --nworkers 5 + +This will run libEnsemble with five workers; one for the persistent generator, and +four for forces simulations (so four GPUs are required). + +## Detailed instructions + +See ../forces_gpu. diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py new file mode 100644 index 0000000000..a74f90146a --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +""" +This example is similar to the forces_gpu test. + +The forces.c application should be built by setting the GPU preprocessor condition +(usually -DGPU) in addition to openMP GPU flags for the given system. See examples +in ../forces_app/build_forces.sh. We recommend running forces.x standalone first +and confirm it is running on the GPU (this is given clearly in the output). + +A number of GPUs is requested based on the number of particles (randomly chosen +from the range for each simulation). For simplicitly, the number of GPUs requested +is based on a linear split of the range (lb to ub), rather than absolute particle +count. + +To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You +will compile forces without -DGPU option. It is recommended that the lb/ub for +particle counts are reduced for CPU performance. +""" + +import os +import sys + +import numpy as np +from forces_simf import run_forces # Sim func from current dir + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.executors import MPIExecutor +from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +if __name__ == "__main__": + + # Initialize MPI Executor + exctr = MPIExecutor() + sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") + + if not os.path.isfile(sim_app): + sys.exit("forces.x not found - please build first in ../forces_app dir") + + exctr.register_app(full_path=sim_app, app_name="forces") + + # Parse number of workers, comms type, etc. from arguments + ensemble = Ensemble(parse_args=True, executor=exctr) + nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator + + # Persistent gen does not need resources + ensemble.libE_specs = LibeSpecs( + num_resource_sets=nsim_workers, + sim_dirs_make=True, + stats_fmt={"show_resource_sets": True}, # see resource sets in libE_stats.txt + # resource_info = {"gpus_on_node": 4}, # for mocking GPUs + ) + + ensemble.sim_specs = SimSpecs( + sim_f=run_forces, + inputs=["x"], + outputs=[("energy", float)], + ) + + ensemble.gen_specs = GenSpecs( + gen_f=gen_f, + inputs=[], # No input when start persistent generator + persis_in=["sim_id"], # Return sim_ids of evaluated points to generator + outputs=[ + ("x", float, (1,)), + ("num_gpus", int), # num_gpus auto given to sim when use MPIExecutor. + ], + user={ + "initial_batch_size": nsim_workers, + "lb": np.array([50000]), # min particles + "ub": np.array([100000]), # max particles + "max_gpus": nsim_workers, + }, + ) + + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": False, # False causes batch returns + }, + ) + + # Instruct libEnsemble to exit after this many simulations + ensemble.exit_criteria = ExitCriteria(sim_max=8) + + # Seed random streams for each worker, particularly for gen_f + ensemble.add_random_streams() + + # Run ensemble + ensemble.run() + + if ensemble.is_manager: + # Note, this will change if change sim_max, nworkers, lb/ub etc... + if ensemble.exit_criteria.sim_max == 8: + chksum = np.sum(ensemble.H["energy"]) + assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}" + print("Checksum passed") + else: + print("Run complete. A checksum has not been provided for the given sim_max") diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_multi_app/cleanup.sh similarity index 100% rename from libensemble/tests/scaling_tests/forces/forces_multi_task/cleanup.sh rename to libensemble/tests/scaling_tests/forces/forces_multi_app/cleanup.sh diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py similarity index 83% rename from libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py rename to libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py index 8f8b108519..bebc748172 100644 --- a/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py +++ b/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py @@ -1,8 +1,5 @@ import numpy as np -# To retrieve our MPI Executor and resources instances -from libensemble.executors.executor import Executor - # Optional status codes to display in libE_stats.txt for each gen or sim from libensemble.message_numbers import TASK_FAILED, WORKER_DONE @@ -24,12 +21,14 @@ def run_forces(H, persis_info, sim_specs, libE_info): # app arguments: num particles, timesteps, also using num particles as seed args = particles + " " + str(10) + " " + particles - # Retrieve our MPI Executor instance and resources - exctr = Executor.executor + # Retrieve our MPI Executor + exctr = libE_info["executor"] + + app_type = H["app_type"][0].decode() - # Submit our forces app for execution. Block until the task starts. + # Submit our forces app for execution. task = exctr.submit( - app_name="forces_gpu", + app_name=app_type, app_args=args, ) @@ -37,7 +36,7 @@ def run_forces(H, persis_info, sim_specs, libE_info): task.wait() # Optional - prints GPU assignment (method and numbers) - check_gpu_setting(task, assert_setting=False, print_setting=True) + check_gpu_setting(task, assert_setting=False, print_setting=True, desc=app_type) # Stat file to check for bad runs statfile = "forces.stat" diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py new file mode 100644 index 0000000000..176c3ad885 --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +""" +This example runs two difference applications, one that uses only CPUs and one +that uses GPUs. Both uses a variable number of processors. The GPU application +uses one GPU per processor. As the generator creates simulations, it randomly +assigns between one and max_proc processors to each simulation, and also randomly +assigns which application is to be run. + +The forces.c application should be compiled for the CPU to `forces_cpu.x`, and +for the GPU (setting the GPU preprocessor condition) to `forces_gpu.x`. + +For compile lines, see examples in ../forces_app/build_forces.sh. + +It is recommended to run this test such that: + ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs] + +E.g, if running on one node with four GPUs, then use: + python run_libE_forces.py --comms local --nworkers 9 + +E.g, if running on one node with eight GPUs, then use: + python run_libE_forces.py --comms local --nworkers 17 +""" + +import os +import sys + +import numpy as np +from forces_simf import run_forces # Sim func from current dir + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.executors import MPIExecutor +from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_diff_simulations as gen_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +if __name__ == "__main__": + + # Initialize MPI Executor instance + exctr = MPIExecutor() + + # Register simulation executable with executor + cpu_app = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x") + gpu_app = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x") + + if not os.path.isfile(cpu_app): + sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir") + if not os.path.isfile(gpu_app): + sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir") + + exctr.register_app(full_path=cpu_app, app_name="cpu_app") + exctr.register_app(full_path=gpu_app, app_name="gpu_app") + + # Parse number of workers, comms type, etc. from arguments + ensemble = Ensemble(parse_args=True, executor=exctr) + nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator + + # Persistent gen does not need resources + ensemble.libE_specs = LibeSpecs( + num_resource_sets=nsim_workers, + sim_dirs_make=True, + stats_fmt={"show_resource_sets": True}, # see resource sets in libE_stats.txt + # resource_info = {"gpus_on_node": 4}, # for mocking GPUs + ) + + ensemble.sim_specs = SimSpecs( + sim_f=run_forces, + inputs=["x", "app_type"], + outputs=[("energy", float)], + ) + + ensemble.gen_specs = GenSpecs( + gen_f=gen_f, + inputs=[], # No input when start persistent generator + persis_in=["sim_id"], # Return sim_ids of evaluated points to generator + outputs=[ + ("x", float, (1,)), + ("num_procs", int), # num_procs auto given to sim when use MPIExecutor + ("num_gpus", int), # num_gpus auto given to sim when use MPIExecutor + ("app_type", "S10"), # select app type (cpu_app or gpu_app) + ], + user={ + "initial_batch_size": nsim_workers, + "lb": np.array([5000]), # min particles + "ub": np.array([10000]), # max particles + "max_procs": (nsim_workers) // 2, # Any sim created can req. 1 worker up to max + }, + ) + + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": False, # False causes batch returns + }, + ) + + # Instruct libEnsemble to exit after this many simulations + ensemble.exit_criteria = ExitCriteria(sim_max=nsim_workers * 2) + + # Seed random streams for each worker, particularly for gen_f + ensemble.add_random_streams() + + # Run ensemble + ensemble.run() + + if ensemble.is_manager: + # Note, this will change if change sim_max, nworkers, lb/ub etc... + chksum = np.sum(ensemble.H["energy"]) + print(f"Final energy checksum: {chksum}") + + exp_chksums = {16: -21935405.696289998, 32: -26563930.6356} + exp_chksum = exp_chksums.get(ensemble.exit_criteria.sim_max) + + if exp_chksum is not None: + assert np.isclose(chksum, exp_chksum), f"energy check sum is {chksum}" + print("Checksum passed") + else: + print("Run complete. An expected checksum has not been provided for the given sim_max") diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_multi_app/submit_perlmutter.sh similarity index 57% rename from libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh rename to libensemble/tests/scaling_tests/forces/forces_multi_app/submit_perlmutter.sh index 73b1fc39ef..f7b74739b4 100644 --- a/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh +++ b/libensemble/tests/scaling_tests/forces/forces_multi_app/submit_perlmutter.sh @@ -1,12 +1,11 @@ #!/bin/bash #SBATCH -J libE_small_test -#SBATCH -A m4272_g +#SBATCH -A #SBATCH -C gpu #SBATCH --time 10 #SBATCH --nodes 1 -# export MPICH_GPU_SUPPORT_ENABLED=1 -# export SLURM_EXACT=1 -# export SLURM_MEM_PER_NODE=0 +export MPICH_GPU_SUPPORT_ENABLED=1 +export SLURM_EXACT=1 python run_libe_forces.py --comms local --nworkers 5 diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py deleted file mode 100644 index ba20944b98..0000000000 --- a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python - -""" -This example is based on the simple forces test. The default number of -particles is increased considerably to give perceptible time on the GPUs when -live-checking GPU usage. - -The forces.c application should be built by setting the GPU preprocessor condition -in addition to openMP GPU flags for the given system. See examples in -../forces_app/build_forces.sh. We recommend running forces.x standalone first -and confirm it is running on the GPU (this is given clearly in the output). - -An alternative variable resource generator is available (search 'var resources' -in this script and uncomment relevant lines). -""" - -import os -import sys - -import numpy as np -from forces_simf import run_forces # Sim func from current dir - -from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f -from libensemble.executors import MPIExecutor -from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_diff_simulations as gen_f -from libensemble.libE import libE -from libensemble.tools import add_unique_random_streams, parse_args - -# Fixed resources (one resource set per worker) -# from libensemble.gen_funcs.sampling import uniform_random_sample as gen_f - - -# Parse number of workers, comms type, etc. from arguments -nworkers, is_manager, libE_specs, _ = parse_args() - -nsim_workers = nworkers - 1 -libE_specs["num_resource_sets"] = nsim_workers # Persistent gen does not need resources - -# To test on system without GPUs - compile forces without -DGPU and mock GPUs with this line. -# libE_specs["resource_info"] = {"gpus_on_node": 4} - -# Initialize MPI Executor instance -exctr = MPIExecutor() - -# Register simulation executable with executor -sim_app1 = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x") -sim_app2 = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x") - -if not os.path.isfile(sim_app1): - sys.exit(f"{sim_app1} not found - please build first in ../forces_app dir") -if not os.path.isfile(sim_app2): - sys.exit(f"{sim_app2} not found - please build first in ../forces_app dir") - -exctr.register_app(full_path=sim_app1, app_name="forces_cpu") -exctr.register_app(full_path=sim_app2, app_name="forces_gpu") - -# State the sim_f, inputs, outputs -sim_specs = { - "sim_f": run_forces, # sim_f, imported above - "in": ["x"], # Name of input for sim_f - "out": [("energy", float)], # Name, type of output from sim_f -} - -# State the gen_f, inputs, outputs, additional parameters -gen_specs = { - "gen_f": gen_f, # Generator function - "in": [], # Generator input - "persis_in": ["sim_id"], # Just send something back to gen to get number of new points. - "out": [ - ("x", float, (1,)), # Name, type and size of data from gen_f - ("num_procs", int), - ("num_gpus", int), - ], - "user": { - "lb": np.array([5000]), # fewest particles (changing will change checksum) - "ub": np.array([10000]), # max particles (changing will change checksum) - "initial_batch_size": nsim_workers, - "max_procs": (nsim_workers) // 2, # Any sim created can req. 1 worker up to max - "multi_task": True, - # "max_resource_sets": nworkers # Uncomment for var resources - }, -} - -alloc_specs = { - "alloc_f": alloc_f, - "user": { - "give_all_with_same_priority": False, - "async_return": False, # False causes batch returns - }, -} - -# Create and work inside separate per-simulation directories -libE_specs["sim_dirs_make"] = True - -# Uncomment to see resource sets in libE_stats.txt - useful with var resources -# libE_specs["stats_fmt"] = {"show_resource_sets": True} - -# Instruct libEnsemble to exit after this many simulations -exit_criteria = {"sim_max": nsim_workers * 2} # changing will change checksum - -# Seed random streams for each worker, particularly for gen_f -persis_info = add_unique_random_streams({}, nworkers + 1) - -# Launch libEnsemble -H, persis_info, flag = libE( - sim_specs, gen_specs, exit_criteria, persis_info=persis_info, alloc_specs=alloc_specs, libE_specs=libE_specs -) diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py index 3c75a356be..ecc2ad4b53 100644 --- a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py +++ b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py @@ -1,13 +1,12 @@ import numpy as np -# To retrieve our MPI Executor instance -from libensemble.executors.executor import Executor - # Optional status codes to display in libE_stats.txt for each gen or sim from libensemble.message_numbers import TASK_FAILED, WORKER_DONE def run_forces(H, persis_info, sim_specs, libE_info): + """Runs the forces MPI application""" + calc_status = 0 # Parse out num particles, from generator function @@ -16,19 +15,17 @@ def run_forces(H, persis_info, sim_specs, libE_info): # app arguments: num particles, timesteps, also using num particles as seed args = particles + " " + str(10) + " " + particles - # Retrieve our MPI Executor instance - exctr = Executor.executor + # Retrieve our MPI Executor + exctr = libE_info["executor"] - # Submit our forces app for execution. Block until the task starts. + # Submit our forces app for execution. task = exctr.submit(app_name="forces", app_args=args) # Block until the task finishes task.wait() - # Stat file to check for bad runs - statfile = "forces.stat" - # Try loading final energy reading, set the sim's status + statfile = "forces.stat" try: data = np.loadtxt(statfile) final_energy = data[-1] @@ -37,9 +34,8 @@ def run_forces(H, persis_info, sim_specs, libE_info): final_energy = np.nan calc_status = TASK_FAILED - # Define our output array, populate with energy reading - outspecs = sim_specs["out"] - output = np.zeros(1, dtype=outspecs) + # Define our output array, populate with energy reading + output = np.zeros(1, dtype=sim_specs["out"]) output["energy"] = final_energy # Return final information to worker, for reporting to manager diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/readme.md b/libensemble/tests/scaling_tests/forces/forces_simple/readme.md index 72ee536839..da4dcebd57 100644 --- a/libensemble/tests/scaling_tests/forces/forces_simple/readme.md +++ b/libensemble/tests/scaling_tests/forces/forces_simple/readme.md @@ -6,16 +6,19 @@ https://libensemble.readthedocs.io/en/develop/tutorials/executor_forces_tutorial ## QuickStart -Build executable and run example. Go to `forces_app` directory and build `forces.x`: +Build forces application and run the ensemble. Go to `forces_app` directory and build `forces.x`: cd ../forces_app ./build_forces.sh Then return here and run: - python run_libe_forces.py --comms local --nworkers 4 + python run_libe_forces.py --comms local --nworkers 5 -## Running test run_libe_forces.py +This will run with four workers. One worker will run the persistent generator. +The other four will run the forces simulations. + +## Detailed instructions Naive Electrostatics Code Test @@ -30,10 +33,11 @@ See `forces_app` directory for details. ### Running with libEnsemble. -A random sample of seeds is taken and used as input to the sim func (forces miniapp). +A random sample of seeds is taken and used as input to the simulation function +(forces miniapp). -In forces_app directory, modify build_forces.sh for target platform and run to -build forces.x: +In the `forces_app` directory, modify `build_forces.sh` for the target platform +and run to build `forces.x`: ./build_forces.sh diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py index 5da33ea49b..066c5e050c 100644 --- a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py +++ b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py @@ -5,16 +5,15 @@ import numpy as np from forces_simf import run_forces # Sim func from current dir +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f from libensemble.executors import MPIExecutor -from libensemble.gen_funcs.sampling import uniform_random_sample -from libensemble.libE import libE -from libensemble.tools import add_unique_random_streams, parse_args +from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs if __name__ == "__main__": - # Parse number of workers, comms type, etc. from arguments - nworkers, is_manager, libE_specs, _ = parse_args() - # Initialize MPI Executor instance + # Initialize MPI Executor exctr = MPIExecutor() # Register simulation executable with executor @@ -25,33 +24,51 @@ exctr.register_app(full_path=sim_app, app_name="forces") - # State the sim_f, inputs, outputs - sim_specs = { - "sim_f": run_forces, # sim_f, imported above - "in": ["x"], # Name of input for sim_f - "out": [("energy", float)], # Name, type of output from sim_f - } - - # State the gen_f, inputs, outputs, additional parameters - gen_specs = { - "gen_f": uniform_random_sample, # Generator function - "in": [], # Generator input - "out": [("x", float, (1,))], # Name, type and size of data from gen_f - "user": { - "lb": np.array([1000]), # User parameters for the gen_f - "ub": np.array([3000]), - "gen_batch_size": 8, + # Parse number of workers, comms type, etc. from arguments + ensemble = Ensemble(parse_args=True, executor=exctr) + nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator + + # Persistent gen does not need resources + ensemble.libE_specs = LibeSpecs( + num_resource_sets=nsim_workers, + sim_dirs_make=True, + ) + + ensemble.sim_specs = SimSpecs( + sim_f=run_forces, + inputs=["x"], + outputs=[("energy", float)], + ) + + ensemble.gen_specs = GenSpecs( + gen_f=gen_f, + inputs=[], # No input when start persistent generator + persis_in=["sim_id"], # Return sim_ids of evaluated points to generator + outputs=[("x", float, (1,))], + user={ + "initial_batch_size": nsim_workers, + "lb": np.array([1000]), # min particles + "ub": np.array([3000]), # max particles }, - } + ) - # Create and work inside separate per-simulation directories - libE_specs["sim_dirs_make"] = True + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": False, # False causes batch returns + }, + ) # Instruct libEnsemble to exit after this many simulations - exit_criteria = {"sim_max": 8} + ensemble.exit_criteria = ExitCriteria(sim_max=8) # Seed random streams for each worker, particularly for gen_f - persis_info = add_unique_random_streams({}, nworkers + 1) + ensemble.add_random_streams() + + # Run ensemble + ensemble.run() - # Launch libEnsemble - H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs) + if ensemble.is_manager: + # Note, this will change if change sim_max, nworkers, lb/ub etc... + print(f'Final energy checksum: {np.sum(ensemble.H["energy"])}') diff --git a/libensemble/tools/parse_args.py b/libensemble/tools/parse_args.py index 4c10cd67ff..d5598bffad 100644 --- a/libensemble/tools/parse_args.py +++ b/libensemble/tools/parse_args.py @@ -158,6 +158,14 @@ def parse_args(): nworkers, is_manager, libE_specs, misc_args = parse_args() + Or for object interface, when creating the ensemble object. + + .. code-block:: python + + from libensemble import Ensemble + + ensemble = Ensemble(parse_args=True) + From the shell:: $ python calling_script --comms local --nworkers 4 diff --git a/libensemble/tools/test_support.py b/libensemble/tools/test_support.py index f883e791b6..c455fcc7b8 100644 --- a/libensemble/tools/test_support.py +++ b/libensemble/tools/test_support.py @@ -84,7 +84,7 @@ def check_mpi_runner(task, exp, print_setting=False): assert ppn_option == runner_info["ppn"] -def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=None): +def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=None, desc=""): """Checks GPU run lines Note that this will check based platform_info or defaults for the MPI runner @@ -214,8 +214,11 @@ def check_gpu_setting(task, assert_setting=True, print_setting=False, resources= else: addon = f"(procs {num_procs}, per node {ppn})" + if desc: + desc += " " + if print_setting: - print(f"Worker {task.workerID}: GPU setting ({stype}): {gpu_setting} {addon}") + print(f"Worker {task.workerID}: {desc}GPU setting ({stype}): {gpu_setting} {addon}", flush=True) if assert_setting: assert (