From 3e67d3f4683017a448de180e3361d0471eeb7240 Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 14:20:55 -0500 Subject: [PATCH 1/8] Add release notes for 1.4.0 --- CHANGELOG.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 85b7c43b64..d9e6a3be6f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,42 @@ GitHub issues are referenced, and can be viewed with hyperlinks on the `github r .. _`github releases page`: https://github.com/Libensemble/libensemble/releases +Release 1.4.0 +-------------- + +:Date: July 25, 2024 + +* Add a live_data option for real-time data collection / plotting. #1310 +* ``nworkers``/``is_manager`` are set when ``Ensemble`` object is created. #1331/ #1336 + * This update locks the comms method when ``Ensemble`` object is created. +* Add ``group_size`` option to deal with unevenly resourced nodes. #1349 +* Bug fix: Fix shutdown hang on worker error when using ``gen_on_manager``. #1348 +* Bug fix: Log level was locked to ``INFO`` when using class interface. +* Updated code to support ``numpy`` 2.0. + + +Documentation: + +* Notebook examples with Colab links added to documentation. #1310 + * E.g., https://colab.research.google.com/github/Libensemble/libensemble/blob/develop/examples/tutorials/aposmm/aposmm_tutorial_notebook.ipynb +* Example of templating input files added to forces tutorial. #1310 + +Example user functions: + +* Update ``gpCAM`` generators to work with latest interface. +* Change ``one_d_func`` to ``norm_eval``. Works with multiple dimensions. #1352 / #1354 + +:Note: + +* Tests were run on Linux and MacOS with Python versions 3.9, 3.10, 3.11, 3.12 +* Heterogeneous workflows tested on Frontier (OLCF), Polaris (ALCF), and Perlmutter (NERSC). +* Note that tests have been recently run on Aurora (ALCF), but the system was unavailable at time of release. +* Tests were also run on Bebop and Improv LCRC systems. + +:Known Issues: + +* See known issues section in the documentation. + Release 1.3.0 -------------- From caf56c6753a18e93b93f3096b9385a39ed25c497 Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 14:27:38 -0500 Subject: [PATCH 2/8] Prep v1.4.0 release --- .wci.yml | 4 ++-- libensemble/version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.wci.yml b/.wci.yml index 5ae4672bd0..f35d1baf8f 100644 --- a/.wci.yml +++ b/.wci.yml @@ -16,8 +16,8 @@ description: | language: Python release: - version: 1.3.0 - date: 2024-05-01 + version: 1.4.0 + date: 2024-07-25 documentation: general: https://libensemble.readthedocs.io diff --git a/libensemble/version.py b/libensemble/version.py index d1cf9a89a9..3e8d9f9462 100644 --- a/libensemble/version.py +++ b/libensemble/version.py @@ -1 +1 @@ -__version__ = "1.3.0+dev" +__version__ = "1.4.0" From 67f669e42fe44943610d08c61250b161be389a6d Mon Sep 17 00:00:00 2001 From: Jeffrey Larson Date: Thu, 25 Jul 2024 14:45:16 -0500 Subject: [PATCH 3/8] Format --- CHANGELOG.rst | 5 ++--- libensemble/tests/unit_tests/test_ensemble.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d9e6a3be6f..a9f7c369c4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,15 +13,14 @@ Release 1.4.0 :Date: July 25, 2024 -* Add a live_data option for real-time data collection / plotting. #1310 +* Add a ``live_data`` option for real-time data collection / plotting. #1310 * ``nworkers``/``is_manager`` are set when ``Ensemble`` object is created. #1331/ #1336 * This update locks the comms method when ``Ensemble`` object is created. -* Add ``group_size`` option to deal with unevenly resourced nodes. #1349 +* Add a ``group_size`` option to deal with unevenly resourced nodes. #1349 * Bug fix: Fix shutdown hang on worker error when using ``gen_on_manager``. #1348 * Bug fix: Log level was locked to ``INFO`` when using class interface. * Updated code to support ``numpy`` 2.0. - Documentation: * Notebook examples with Colab links added to documentation. #1310 diff --git a/libensemble/tests/unit_tests/test_ensemble.py b/libensemble/tests/unit_tests/test_ensemble.py index b368ae3b0b..f956141c08 100644 --- a/libensemble/tests/unit_tests/test_ensemble.py +++ b/libensemble/tests/unit_tests/test_ensemble.py @@ -22,7 +22,7 @@ def test_ensemble_parse_args_false(): from libensemble.ensemble import Ensemble from libensemble.specs import LibeSpecs - # Ensemble(parse_args=False) by default, so these specs wont be overwritten: + # Ensemble(parse_args=False) by default, so these specs won't be overwritten: e = Ensemble(libE_specs={"comms": "local", "nworkers": 4}) assert hasattr(e, "nworkers"), "nworkers should've passed from libE_specs to Ensemble class" assert isinstance(e.libE_specs, LibeSpecs), "libE_specs should've been cast to class" From 98ed1ea8b680730ec0b4b101b606014dc73734e2 Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 15:30:34 -0500 Subject: [PATCH 4/8] Update Running libEnsemble docs --- docs/running_libE.rst | 67 ++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/docs/running_libE.rst b/docs/running_libE.rst index d7f5c9b5f6..c3c4c585db 100644 --- a/docs/running_libE.rst +++ b/docs/running_libE.rst @@ -24,8 +24,12 @@ generator and simulator functions. Many :doc:`examples` are available. There are currently three communication options for libEnsemble (determining how -the Manager and Workers communicate). These are ``mpi``, ``local``, ``tcp``. -The default is ``mpi``. +the Manager and Workers communicate). These are ``local``, ``mpi``, ``tcp``. +The default is ``local`` if ``nworkers`` is specified, otherwise ``mpi``. + +Note that ``local`` comms can be used on multi-node systems, where +the ``MPI executor`` is used to distribute MPI applications across the nodes. +Indeed, this is the most commonly used option, even on large supercomputers. .. note:: You do not need the ``mpi`` communication mode to use the @@ -34,38 +38,12 @@ The default is ``mpi``. .. tab-set:: - .. tab-item:: MPI Comms - - This option uses mpi4py_ for the Manager/Worker communication. It is used automatically if - you run your libEnsemble calling script with an MPI runner such as:: - - mpirun -np N python myscript.py - - where ``N`` is the number of processes. This will launch one manager and - ``N-1`` workers. - - This option requires ``mpi4py`` to be installed to interface with the MPI on your system. - It works on a standalone system, and with both - :doc:`central and distributed modes` of running libEnsemble on - multi-node systems. - - It also potentially scales the best when running with many workers on HPC systems. - - **Limitations of MPI mode** - - If launching MPI applications from workers, then MPI is nested. **This is not - supported with Open MPI**. This can be overcome by using a proxy launcher - (see :doc:`Balsam`). This nesting does work - with MPICH_ and its derivative MPI implementations. - - It is also unsuitable to use this mode when running on the **launch** nodes of - three-tier systems (e.g., Summit). In that case ``local`` mode is recommended. - .. tab-item:: Local Comms Uses Python's built-in multiprocessing_ module. The ``comms`` type ``local`` and number of workers ``nworkers`` may be provided in :ref:`libE_specs`. + Then run:: python myscript.py @@ -78,6 +56,10 @@ The default is ``mpi``. This will launch one manager and ``N`` workers. + The following abbreviated line is equivalent to the above:: + + python myscript.py -n N + libEnsemble will run on **one node** in this scenario. To :doc:`disallow this node` from app-launches (if running libEnsemble on a compute node), @@ -97,6 +79,33 @@ The default is ``mpi``. - In some scenarios, any import of ``mpi4py`` will cause this to break. - Does not have the potential scaling of MPI mode, but is sufficient for most users. + .. tab-item:: MPI Comms + + This option uses mpi4py_ for the Manager/Worker communication. It is used automatically if + you run your libEnsemble calling script with an MPI runner such as:: + + mpirun -np N python myscript.py + + where ``N`` is the number of processes. This will launch one manager and + ``N-1`` workers. + + This option requires ``mpi4py`` to be installed to interface with the MPI on your system. + It works on a standalone system, and with both + :doc:`central and distributed modes` of running libEnsemble on + multi-node systems. + + It also potentially scales the best when running with many workers on HPC systems. + + **Limitations of MPI mode** + + If launching MPI applications from workers, then MPI is nested. **This is not + supported with Open MPI**. This can be overcome by using a proxy launcher + (see :doc:`Balsam`). This nesting does work + with MPICH_ and its derivative MPI implementations. + + It is also unsuitable to use this mode when running on the **launch** nodes of + three-tier systems (e.g., Summit). In that case ``local`` mode is recommended. + .. tab-item:: TCP Comms Run the Manager on one system and launch workers to remote From 43026fb1fbb0a248908cf039ed8e91a21b5b8d9c Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 15:40:11 -0500 Subject: [PATCH 5/8] Update run instructions with link to MPI Executor --- docs/running_libE.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/running_libE.rst b/docs/running_libE.rst index c3c4c585db..1af3ea1f57 100644 --- a/docs/running_libE.rst +++ b/docs/running_libE.rst @@ -28,8 +28,9 @@ the Manager and Workers communicate). These are ``local``, ``mpi``, ``tcp``. The default is ``local`` if ``nworkers`` is specified, otherwise ``mpi``. Note that ``local`` comms can be used on multi-node systems, where -the ``MPI executor`` is used to distribute MPI applications across the nodes. -Indeed, this is the most commonly used option, even on large supercomputers. +the :doc:`executor` is used to distribute MPI applications +across the nodes. Indeed, this is the most commonly used option, even on large +supercomputers. .. note:: You do not need the ``mpi`` communication mode to use the @@ -65,7 +66,7 @@ Indeed, this is the most commonly used option, even on large supercomputers. from app-launches (if running libEnsemble on a compute node), set ``libE_specs["dedicated_mode"] = True``. - This mode is often used to run on a **launch** node of a three-tier + This mode can also be used to run on a **launch** node of a three-tier system (e.g., Summit), ensuring the whole compute-node allocation is available for launching apps. Make sure there are no imports of ``mpi4py`` in your Python scripts. From d8fcc07b7f750274dd1529ac859dc339d2309ac1 Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 15:46:45 -0500 Subject: [PATCH 6/8] Improve running libE in docs --- docs/running_libE.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running_libE.rst b/docs/running_libE.rst index 1af3ea1f57..cb86262526 100644 --- a/docs/running_libE.rst +++ b/docs/running_libE.rst @@ -28,7 +28,7 @@ the Manager and Workers communicate). These are ``local``, ``mpi``, ``tcp``. The default is ``local`` if ``nworkers`` is specified, otherwise ``mpi``. Note that ``local`` comms can be used on multi-node systems, where -the :doc:`executor` is used to distribute MPI applications +the :doc:`MPI executor` is used to distribute MPI applications across the nodes. Indeed, this is the most commonly used option, even on large supercomputers. From 202878f93eec5c138b2e023607e5bea5ae907106 Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 17:41:35 -0500 Subject: [PATCH 7/8] Update forces notebook --- .../forces_tutorial_notebook.ipynb | 76 +++++++++---------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/examples/tutorials/forces_with_executor/forces_tutorial_notebook.ipynb b/examples/tutorials/forces_with_executor/forces_tutorial_notebook.ipynb index dce22655ac..2f86b7ada9 100644 --- a/examples/tutorials/forces_with_executor/forces_tutorial_notebook.ipynb +++ b/examples/tutorials/forces_with_executor/forces_tutorial_notebook.ipynb @@ -181,7 +181,7 @@ " task = exctr.submit(app_name=\"forces\", app_args=args, num_procs=1)\n", "\n", " # Block until the task finishes\n", - " task.wait(timeout=60)\n", + " task.wait()\n", "\n", " output, calc_status = read_output(sim_specs)\n", "\n", @@ -232,11 +232,11 @@ "import numpy as np\n", "from pprint import pprint\n", "\n", - "# from forces_simf import run_forces # Sim func from current dir\n", + "# from forces_simf import run_forces # Use is sim function is in a file\n", "\n", "from libensemble import Ensemble\n", + "from libensemble.specs import ExitCriteria, GenSpecs, LibeSpecs, SimSpecs\n", "from libensemble.gen_funcs.sampling import uniform_random_sample\n", - "from libensemble.tools import add_unique_random_streams\n", "from libensemble.executors import MPIExecutor\n", "\n", "# Initialize MPI Executor\n", @@ -266,32 +266,27 @@ "metadata": {}, "outputs": [], "source": [ - "nworkers = 2\n", - "\n", "# Global settings - including creating directories for each simulation\n", - "libE_specs = {\n", - " \"nworkers\": nworkers,\n", - " \"comms\": \"local\",\n", - " \"sim_dirs_make\": True,\n", - "}\n", - "\n", - "# State the sim_f, inputs, outputs\n", - "sim_specs = {\n", - " \"sim_f\": run_forces,\n", - " \"in\": [\"x\"], # Name of input for sim_f (defined in gen_specs[\"out\"])\n", - " \"out\": [(\"energy\", float)],\n", - "}\n", - "\n", - "# State the gen_f, inputs, outputs, additional parameters\n", - "gen_specs = {\n", - " \"gen_f\": uniform_random_sample,\n", - " \"out\": [(\"x\", float, (1,))],\n", - " \"user\": {\n", + "libE_specs = LibeSpecs(\n", + " nworkers=2,\n", + " sim_dirs_make=True,\n", + ")\n", + "\n", + "gen_specs = GenSpecs(\n", + " gen_f=uniform_random_sample,\n", + " outputs=[(\"x\", float, (1,))],\n", + " user={\n", " \"lb\": np.array([1000]), # min particles\n", " \"ub\": np.array([3000]), # max particles\n", " \"gen_batch_size\": 8,\n", " },\n", - "}" + ")\n", + "\n", + "sim_specs = SimSpecs(\n", + " sim_f=run_forces,\n", + " inputs=[\"x\"], # Name of input for sim_f (defined in gen_specs.outputs)\n", + " outputs=[(\"energy\", float)],\n", + ")" ] }, { @@ -308,10 +303,7 @@ "outputs": [], "source": [ "# Instruct libEnsemble to exit after this many simulations\n", - "exit_criteria = {\"sim_max\": 8}\n", - "\n", - "# Seed random streams for each worker, particularly for gen_f\n", - "persis_info = add_unique_random_streams({}, nworkers + 1)\n", + "exit_criteria = ExitCriteria(sim_max=8)\n", "\n", "# Initialize ensemble object, passing executor.\n", "ensemble = Ensemble(\n", @@ -320,8 +312,10 @@ " gen_specs=gen_specs,\n", " sim_specs=sim_specs,\n", " exit_criteria=exit_criteria,\n", - " persis_info=persis_info,\n", - ")" + ")\n", + "\n", + "# Seed random streams for each worker, particularly for gen_f\n", + "ensemble.add_random_streams()" ] }, { @@ -559,19 +553,17 @@ "input_file = \"forces_input\"\n", "\n", "# Add a field to libE_specs\n", - "libE_specs[\"sim_dir_copy_files\"] = [input_file]\n", - "ensemble.libE_specs = libE_specs\n", - "\n", - "ensemble.sim_specs = {\n", - " \"sim_f\": run_forces_using_file,\n", - " \"in\": [\"x\"], # Name of input for sim_f (defined in gen_specs[\"out\"])\n", - " \"out\": [(\"energy\", float)],\n", - " \"user\": {\"input_filename\": input_file, \"input_names\": [\"particles\"]},\n", - "}\n", + "ensemble.libE_specs.sim_dir_copy_files = [input_file]\n", "\n", + "ensemble.sim_specs = SimSpecs(\n", + " sim_f=run_forces_using_file,\n", + " inputs=[\"x\"], # Name of input for sim_f (defined in gen_specs.outputs)\n", + " outputs=[(\"energy\", float)],\n", + " user={\"input_filename\": input_file, \"input_names\": [\"particles\"]},\n", + ")\n", "\n", "# To reset random number seed in the generator\n", - "ensemble.persis_info = add_unique_random_streams({}, nworkers + 1)\n", + "ensemble.add_random_streams()\n", "\n", "# Clean up any previous outputs and launch libEnsemble\n", "cleanup()\n", @@ -595,7 +587,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": false + }, "outputs": [], "source": [ "! ls -l ensemble/sim*" From fe7dc4b02727e4b0041338e026b6588b8f856fdf Mon Sep 17 00:00:00 2001 From: shudson Date: Thu, 25 Jul 2024 17:42:08 -0500 Subject: [PATCH 8/8] Update README example --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 68304ee902..1a27ec7484 100644 --- a/README.rst +++ b/README.rst @@ -59,7 +59,7 @@ and an exit condition. Run the following four-worker example via ``python this_f gen_specs = GenSpecs( gen_f=uniform_random_sample, - outputs=[("x", float, (2,))], + outputs=[("x", float, 2)], user={ "gen_batch_size": 50, "lb": np.array([-3, -2]),