Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute selection: deviceIndex & enforce 1 thread in vacuum #752

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions news/compute_selection_fixes.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
**Added:**

* OpenMMEngineSettings now has a `gpu_device_index` attribute
allowing users to pass through a list of ints to select the
GPU devices to run their simulations on.

**Changed:**

* `openfe.protocols.openmm_rfe._rfe_utils.compute` has been moved
to `openfe.protocols.openmm_utils.omm_compute`.

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* OpenMM CPU vacuum calculations now enforce the use of a single
CPU to avoid large performance losses.

**Security:**

* <news item>
28 changes: 21 additions & 7 deletions openfe/protocols/openmm_afe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@
MultiStateSimulationSettings, OpenMMEngineSettings,
IntegratorSettings, LambdaSettings, MultiStateOutputSettings,
ThermoSettings, OpenFFPartialChargeSettings,
OpenMMSystemGeneratorFFSettings,
)
from openfe.protocols.openmm_rfe._rfe_utils import compute
from openfe.protocols.openmm_md.plain_md_methods import PlainMDProtocolUnit
from ..openmm_utils import (
settings_validation, system_creation,
multistate_analysis, charge_generation
multistate_analysis, charge_generation,
omm_compute,
)
from openfe.utils import without_oechem_backend

Expand Down Expand Up @@ -175,6 +176,7 @@ def _pre_equilibrate(
settings : dict[str, SettingsBaseModel]
A dictionary of settings objects. Expects the
following entries:
* `forcefield_settings`
* `engine_settings`
* `thermo_settings`
* `integrator_settings`
Expand All @@ -189,8 +191,12 @@ def _pre_equilibrate(
Equilibrated system positions
"""
# Prep the simulation object
platform = compute.get_openmm_platform(
settings['engine_settings'].compute_platform
# Restrict CPU count if running vacuum simulation
restrict_cpu = settings['forcefield_settings'].nonbonded_method.lower() == 'nocutoff'
platform = omm_compute.get_openmm_platform(
platform_name=settings['engine_settings'].compute_platform,
gpu_device_index=settings['engine_settings'].gpu_device_index,
restrict_cpu_count=restrict_cpu
)

integrator = openmm.LangevinMiddleIntegrator(
Expand Down Expand Up @@ -710,14 +716,16 @@ def _get_reporter(

def _get_ctx_caches(
self,
forcefield_settings: OpenMMSystemGeneratorFFSettings,
engine_settings: OpenMMEngineSettings
) -> tuple[openmmtools.cache.ContextCache, openmmtools.cache.ContextCache]:
"""
Set the context caches based on the chosen platform

Parameters
----------
engine_settings : OpenMMEngineSettings,
forcefield_settings: OpenMMSystemGeneratorFFSettings
engine_settings : OpenMMEngineSettings

Returns
-------
Expand All @@ -726,8 +734,13 @@ def _get_ctx_caches(
sampler_context_cache : openmmtools.cache.ContextCache
The sampler state context cache.
"""
platform = compute.get_openmm_platform(
engine_settings.compute_platform,
# Get the compute platform
# Set the number of CPUs to 1 if running a vacuum simulation
restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff'
platform = omm_compute.get_openmm_platform(
platform_name=engine_settings.compute_platform,
gpu_device_index=engine_settings.gpu_device_index,
restrict_cpu_count=restrict_cpu
)

energy_context_cache = openmmtools.cache.ContextCache(
Expand Down Expand Up @@ -1026,6 +1039,7 @@ def run(self, dry=False, verbose=True,
try:
# 12. Get context caches
energy_ctx_cache, sampler_ctx_cache = self._get_ctx_caches(
settings['forcefield_settings'],
settings['engine_settings']
)

Expand Down
10 changes: 6 additions & 4 deletions openfe/protocols/openmm_md/plain_md_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,9 @@
)
from openff.toolkit.topology import Molecule as OFFMolecule

from openfe.protocols.openmm_rfe._rfe_utils import compute
from openfe.protocols.openmm_utils import (
system_validation, settings_validation, system_creation,
charge_generation,
charge_generation, omm_compute
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -618,8 +617,11 @@ def run(self, *, dry=False, verbose=True,
)

# 10. Get platform
platform = compute.get_openmm_platform(
protocol_settings.engine_settings.compute_platform
restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff'
platform = omm_compute.get_openmm_platform(
platform_name=protocol_settings.engine_settings.compute_platform,
gpu_device_index=protocol_settings.engine_settings.gpu_device_index,
restrict_cpu_count=restrict_cpu
)

# 11. Set the integrator
Expand Down
1 change: 0 additions & 1 deletion openfe/protocols/openmm_rfe/_rfe_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from . import (
compute,
lambdaprotocol,
multistate,
relative,
Expand Down
12 changes: 8 additions & 4 deletions openfe/protocols/openmm_rfe/equil_rfe_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
)
from ..openmm_utils import (
system_validation, settings_validation, system_creation,
multistate_analysis, charge_generation
multistate_analysis, charge_generation, omm_compute,
)
from . import _rfe_utils
from ...utils import without_oechem_backend, log_system_probe
Expand Down Expand Up @@ -930,9 +930,13 @@ def run(self, *, dry=False, verbose=True,
bfactors=bfactors,
)

# 10. Get platform
platform = _rfe_utils.compute.get_openmm_platform(
protocol_settings.engine_settings.compute_platform
# 10. Get compute platform
# restrict to a single CPU if running vacuum
restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff'
platform = omm_compute.get_openmm_platform(
platform_name=protocol_settings.engine_settings.compute_platform,
gpu_device_index=protocol_settings.engine_settings.gpu_device_index,
restrict_cpu_count=restrict_cpu
)

# 11. Set the integrator
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,39 @@
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
# Adapted Perses' perses.app.setup_relative_calculation.get_openmm_platform
from typing import Optional
import warnings
import logging


logger = logging.getLogger(__name__)


def get_openmm_platform(platform_name=None):
def get_openmm_platform(
platform_name: Optional[str] = None,
gpu_device_index: Optional[list[int]] = None,
restrict_cpu_count: bool = False
):
"""
Return OpenMM's platform object based on given name. Setting to mixed
precision if using CUDA or OpenCL.

Parameters
----------
platform_name : str, optional, default=None
platform_name : Optional[str]
String with the platform name. If None, it will use the fastest
platform supporting mixed precision.
Default ``None``.
gpu_device_index : Optional[list[str]]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we should probably have a chat about how we handle this long term - this is a bit like MPI settings, where technically we shouldn't make this immutable but maybe something we pick up at run time?

How can we go about handling this properly?

GPU device index selection. If ``None`` the default OpenMM
GPU selection will be used.
See the `OpenMM platform properties documentation <http://docs.openmm.org/latest/userguide/library/04_platform_specifics.html>`_
for more details.
Default ``None``.
restrict_cpu_count : bool
Optional hint to restrict the CPU count to 1 when
``platform_name`` is CPU. This allows Protocols to ensure
that no large performance in cases like vacuum simulations.

Returns
-------
Expand All @@ -44,16 +60,22 @@
# Set precision and properties
name = platform.getName()
if name in ['CUDA', 'OpenCL']:
platform.setPropertyDefaultValue(
'Precision', 'mixed')
platform.setPropertyDefaultValue('Precision', 'mixed')
if gpu_device_index is not None:
index_list = ','.join(str(i) for i in gpu_device_index)
platform.setPropertyDefaultValue('DeviceIndex', index_list)

Check warning on line 66 in openfe/protocols/openmm_utils/omm_compute.py

View check run for this annotation

Codecov / codecov/patch

openfe/protocols/openmm_utils/omm_compute.py#L63-L66

Added lines #L63 - L66 were not covered by tests

if name == 'CUDA':
platform.setPropertyDefaultValue(
'DeterministicForces', 'true')

if name != 'CUDA':
wmsg = (f"Non-GPU platform selected: {name}, this may significantly "
wmsg = (f"Non-CUDA platform selected: {name}, this may significantly "
"impact simulation performance")
warnings.warn(wmsg)
logging.warning(wmsg)

if name == 'CPU' and restrict_cpu_count:
platform.setPropertyDefaultValue('Threads', '1')

return platform
12 changes: 12 additions & 0 deletions openfe/protocols/openmm_utils/omm_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,18 @@ class OpenMMEngineSettings(SettingsBaseModel):
OpenMM compute platform to perform MD integration with. If ``None``, will
choose fastest available platform. Default ``None``.
"""
gpu_device_index: Optional[list[int]] = None
"""
List of integer indices for the GPU device to select when
``compute_platform`` is either set to ``CUDA`` or ``OpenCL``.

If ``None``, the default OpenMM GPU selection behaviour is used.

See the `OpenMM platform properties documentation <http://docs.openmm.org/latest/userguide/library/04_platform_specifics.html>`_
for more details.

Default ``None``.
"""


class IntegratorSettings(SettingsBaseModel):
Expand Down
6 changes: 3 additions & 3 deletions openfe/tests/protocols/test_openmm_equil_rfe_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@
from openfe.protocols.openmm_rfe.equil_rfe_methods import (
_validate_alchemical_components, _get_alchemical_charge_difference
)
from openfe.protocols.openmm_utils import system_creation
from openfe.protocols.openmm_utils import system_creation, omm_compute
from openfe.protocols.openmm_utils.charge_generation import (
HAS_NAGL, HAS_OPENEYE, HAS_ESPALOMA
)


def test_compute_platform_warn():
with pytest.warns(UserWarning, match="Non-GPU platform selected: CPU"):
openmm_rfe._rfe_utils.compute.get_openmm_platform('CPU')
with pytest.warns(UserWarning, match="Non-CUDA platform selected: CPU"):
omm_compute.get_openmm_platform('CPU')


def test_append_topology(benzene_complex_system, toluene_complex_system):
Expand Down
2 changes: 1 addition & 1 deletion openfe/tests/protocols/test_rfe_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def instance(self):

class TestRelativeHybridTopologyProtocol(GufeTokenizableTestsMixin):
cls = openmm_rfe.RelativeHybridTopologyProtocol
key = "RelativeHybridTopologyProtocol-bfb2f60bea0352a15bf6832d14d6e46e"
key = "RelativeHybridTopologyProtocol-99069cb606c80c9884a024c17fdab2e4"
repr = f"<{key}>"

@pytest.fixture()
Expand Down
2 changes: 1 addition & 1 deletion openfe/tests/protocols/test_solvation_afe_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def protocol_result(afe_solv_transformation_json):

class TestAbsoluteSolvationProtocol(GufeTokenizableTestsMixin):
cls = openmm_afe.AbsoluteSolvationProtocol
key = "AbsoluteSolvationProtocol-c602c05772bd839d7717f4820d2961e1"
key = "AbsoluteSolvationProtocol-d46cbe721f0fb25534bc075db9af968c"
repr = f"<{key}>"

@pytest.fixture()
Expand Down
Loading