Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synchronise with Exscientia remote #307

Merged
merged 19 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6778413
filter the warning for save free energy data (#42)
xiki-tempula Apr 18, 2024
3280d55
Fix the issue where position restraint cannot coexist with ABFE restr…
xiki-tempula Apr 23, 2024
b074d06
Make saveMetric more rubust (#44)
xiki-tempula Apr 26, 2024
61feddc
Allow Trajectory correctly handle system generated from file (#45)
xiki-tempula Apr 30, 2024
e12b2e8
Merge remote-tracking branch 'OpenBioSim/main' into feat_sync
xiki-tempula May 1, 2024
cf86a04
Use the new Sire
xiki-tempula May 1, 2024
73e0c8c
Merge pull request #46 from Exscientia/feat_sync
xiki-tempula May 3, 2024
a4664bc
Backport fix from PR #281. [ci skip]
lohedges May 7, 2024
e565b26
Merge pull request #286 from OpenBioSim/backport_281
lohedges May 7, 2024
6b192f4
Merge remote-tracking branch 'OpenBioSim/main' into feat_traj
xiki-tempula May 8, 2024
f688561
Merge pull request #47 from Exscientia/feat_traj
xiki-tempula May 16, 2024
70a195d
Make different part of the saveMetric function optional (#48)
xiki-tempula May 28, 2024
86555d1
Feat save metric (#49)
xiki-tempula May 31, 2024
0d7cf23
Fix ABFE restraint written twice when there is no position restraint …
xiki-tempula Jun 13, 2024
9e7750b
Fix charge perturbation for amber (#51)
xiki-tempula Jun 19, 2024
132c962
Allow AMBER process to run with decoupled system (#52)
xiki-tempula Jul 2, 2024
6b092d2
Merge remote-tracking branch 'exscientia/devel' into sync_exscientia
lohedges Jul 3, 2024
ce3c877
Re-add platform specific skipif decorators that were removed.
lohedges Jul 3, 2024
44e83d1
Add missing skipif decorator.
lohedges Jul 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/Sandpit_exs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macOS-latest",]
os: ["ubuntu-latest", ]
python-version: ["3.10",]

steps:
Expand All @@ -35,7 +35,7 @@ jobs:

- name: Install dependency
run: |
mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.5" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm
mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2024.1.0" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm
python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git
# For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis
python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@
install_command="pip install MDRestraintsGenerator",
)
if _have_imported(_MDRestraintsGenerator):
from Bio import BiopythonDeprecationWarning

_warnings.simplefilter("ignore", BiopythonDeprecationWarning)
from MDRestraintsGenerator import search as _search
from MDRestraintsGenerator.restraints import (
FindBoreschRestraint as _FindBoreschRestraint,
Expand Down
99 changes: 60 additions & 39 deletions python/BioSimSpace/Sandpit/Exscientia/Process/_amber.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ def __init__(
property_map,
)

if (
not isinstance(protocol, _Protocol._FreeEnergyMixin)
and system.nPerturbableMolecules()
):
raise _IncompatibleError(
"Perturbable system is not compatible with none free energy protocol."
)

# Set the package name.
self._package_name = "AMBER"

Expand Down Expand Up @@ -2936,7 +2944,7 @@ def _init_stdout_dict(self):
for file in _Path(self.workDir()).glob("*.out.offset"):
os.remove(file)

def saveMetric(
def _saveMetric(
self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet"
):
"""
Expand All @@ -2945,41 +2953,54 @@ def saveMetric(
is Free Energy protocol, the dHdl and the u_nk data will be saved in the
same parquet format as well.
"""

_assert_imported(_alchemlyb)

self._init_stdout_dict()
datadict = dict()
if isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys = [
("Time (ps)", None, "getStep"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getTotalEnergy",
),
]
else:
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
("Temperature (kelvin)", _Units.Temperature.kelvin, "getTemperature"),
]
# # Disable this now
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract(
f"{self.workDir()}/{self._name}.out",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
if "u_nk" in energy and energy["u_nk"] is not None:
energy["u_nk"].to_parquet(path=f"{self.workDir()}/{u_nk}", index=True)
if "dHdl" in energy and energy["dHdl"] is not None:
energy["dHdl"].to_parquet(path=f"{self.workDir()}/{dHdl}", index=True)
if filename is not None:
self._init_stdout_dict()
if isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys = [
("Time (ps)", None, "getStep"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getTotalEnergy",
),
]
else:
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
(
"Temperature (kelvin)",
_Units.Temperature.kelvin,
"getTemperature",
),
]
# # Disable this now
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if u_nk is not None or dHdl is not None:
_assert_imported(_alchemlyb)

if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract(
f"{self.workDir()}/{self._name}.out",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
with _warnings.catch_warnings():
_warnings.filterwarnings(
"ignore",
message="The DataFrame has column names of mixed type.",
)
if "u_nk" in energy and energy["u_nk"] is not None:
energy["u_nk"].to_parquet(
path=f"{self.workDir()}/{u_nk}", index=True
)
if "dHdl" in energy and energy["dHdl"] is not None:
energy["dHdl"].to_parquet(
path=f"{self.workDir()}/{dHdl}", index=True
)
134 changes: 80 additions & 54 deletions python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@
_alchemlyb = _try_import("alchemlyb")

if _have_imported(_alchemlyb):
from alchemlyb.parsing.gmx import extract as _extract
from alchemlyb.parsing.gmx import extract_u_nk as _extract_u_nk
from alchemlyb.parsing.gmx import extract_dHdl as _extract_dHdl

from .. import _gmx_exe
from .. import _isVerbose
Expand Down Expand Up @@ -400,18 +401,19 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None)
property_map=self._property_map,
)

# Write the restraint to the topology file
if self._restraint:
with open(topol_file, "a") as f:
f.write("\n")
f.write(
self._restraint.toString(
engine="GROMACS",
perturbation_type=self._protocol.getPerturbationType(),
restraint_lambda="restraint"
in self._protocol.getLambda(type="series"),
)
def _apply_ABFE_restraint(self):
# Write the restraint to the topology file
if self._restraint:
with open(self._top_file, "a") as f:
f.write("\n")
f.write(
self._restraint.toString(
engine="GROMACS",
perturbation_type=self._protocol.getPerturbationType(),
restraint_lambda="restraint"
in self._protocol.getLambda(type="series"),
)
)

def _generate_config(self):
"""Generate GROMACS configuration file strings."""
Expand Down Expand Up @@ -488,6 +490,7 @@ def _generate_config(self):
if isinstance(self._protocol, _Protocol._FreeEnergyMixin)
else None
)
self._apply_ABFE_restraint()
self.addToConfig(
config.generateGromacsConfig(
extra_options={**config_options, **self._extra_options},
Expand Down Expand Up @@ -841,7 +844,6 @@ def getSystem(self, block="AUTO"):
system : :class:`System <BioSimSpace._SireWrappers.System>`
The latest molecular system.
"""

# Wait for the process to finish.
if block is True:
self.wait()
Expand All @@ -856,6 +858,10 @@ def getSystem(self, block="AUTO"):
if block is True and not self.isError():
return self._getFinalFrame()
else:
raise ValueError(
"getSystem should not use `trjconv` to get the frame in production settings."
"Trigger an exception here to show the traceback when that happens."
)
# Minimisation trajectories have a single frame, i.e. the final state.
if isinstance(self._protocol, _Protocol.Minimisation):
time = 0 * _Units.Time.nanosecond
Expand Down Expand Up @@ -2396,22 +2402,27 @@ def _parse_energy_terms(text):
U-B, Proper-Dih.. Note that this order is absolute and will not be
changed by the input to `gmx energy`.
"""
sections = text.split("---")
# Get rid of all the nasty message from argo
content = text.split("End your selection with an empty line or a zero.")[1]
sections = content.split("---")
# Remove the empty sections
sections = [section for section in sections if section]
sections = [section for section in sections if section.strip()]
# Concatenate the lines
section = sections[1].replace("\n", "")
section = sections[0].replace("\n", "")
terms = section.split()
# Remove the possible '-' from the separation line
terms = [term for term in terms if term != "-"]
# Check if the index order is correct
indexes = [int(term) for term in terms[::2]]
try:
indexes = [int(term) for term in terms[::2]]
except ValueError:
raise ValueError("Cannot parse the terms: {}".format("\n".join(terms)))
energy_names = terms[1::2]
length_nomatch = len(indexes) != len(energy_names)
# -1 as the index is 1-based.
index_nomatch = (_np.arange(len(indexes)) != _np.array(indexes) - 1).any()
if length_nomatch or index_nomatch:
raise ValueError(f"Cannot parse the energy terms in the {edr_file} file.")
raise ValueError(f"Cannot parse the energy terms in the {text} file.")
else:
return energy_names

Expand Down Expand Up @@ -2858,7 +2869,7 @@ def _find_trajectory_file(self):
else:
return self._traj_file

def saveMetric(
def _saveMetric(
self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet"
):
"""
Expand All @@ -2867,41 +2878,56 @@ def saveMetric(
is Free Energy protocol, the dHdl and the u_nk data will be saved in the
same parquet format as well.
"""

_assert_imported(_alchemlyb)

self._update_energy_dict(initialise=True)
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
]
if not isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys.extend(
[
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
(
"Temperature (kelvin)",
_Units.Temperature.kelvin,
"getTemperature",
),
]
)
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract(
f"{self.workDir()}/{self._name}.xvg",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
if "u_nk" in energy:
energy["u_nk"].to_parquet(path=f"{self.workDir()}/{u_nk}", index=True)
if "dHdl" in energy:
energy["dHdl"].to_parquet(path=f"{self.workDir()}/{dHdl}", index=True)
if filename is not None:
self._update_energy_dict(initialise=True)
datadict_keys = [
("Time (ps)", _Units.Time.picosecond, "getTime"),
(
"PotentialEnergy (kJ/mol)",
_Units.Energy.kj_per_mol,
"getPotentialEnergy",
),
]
if not isinstance(self._protocol, _Protocol.Minimisation):
datadict_keys.extend(
[
("Volume (nm^3)", _Units.Volume.nanometer3, "getVolume"),
("Pressure (bar)", _Units.Pressure.bar, "getPressure"),
(
"Temperature (kelvin)",
_Units.Temperature.kelvin,
"getTemperature",
),
]
)
df = self._convert_datadict_keys(datadict_keys)
df.to_parquet(path=f"{self.workDir()}/{filename}", index=True)
if u_nk is not None:
_assert_imported(_alchemlyb)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract_u_nk(
f"{self.workDir()}/{self._name}.xvg",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
with _warnings.catch_warnings():
_warnings.filterwarnings(
"ignore",
message="The DataFrame has column names of mixed type.",
)
energy.to_parquet(path=f"{self.workDir()}/{u_nk}", index=True)
if dHdl is not None:
_assert_imported(_alchemlyb)
if isinstance(self._protocol, _Protocol.FreeEnergy):
energy = _extract_dHdl(
f"{self.workDir()}/{self._name}.xvg",
T=self._protocol.getTemperature() / _Units.Temperature.kelvin,
)
with _warnings.catch_warnings():
_warnings.filterwarnings(
"ignore",
message="The DataFrame has column names of mixed type.",
)
energy.to_parquet(path=f"{self.workDir()}/{dHdl}", index=True)


def _is_minimisation(config):
Expand Down
25 changes: 14 additions & 11 deletions python/BioSimSpace/Sandpit/Exscientia/Process/_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,16 +911,6 @@ def wait(self, max_time=None):

# The process isn't running.
if not self.isRunning():
try:
self.saveMetric()
except Exception:
exception_info = traceback.format_exc()
with open(f"{self.workDir()}/{self._name}.err", "a+") as f:
f.write("Exception Information during saveMetric():\n")
f.write("======================\n")
f.write(exception_info)
f.write("\n\n")

return

if max_time is not None:
Expand Down Expand Up @@ -1650,12 +1640,25 @@ def _generate_args(self):
"""Generate the dictionary of command-line arguments."""
self.clearArgs()

def _saveMetric(self, filename, u_nk, dHdl):
"""The abstract function to save the metric and free energy data. Need to be
defined for each MD engine."""
pass

def saveMetric(
self, filename="metric.parquet", u_nk="u_nk.parquet", dHdl="dHdl.parquet"
):
"""The abstract function to save the metric and free energy data. Need to be
defined for each MD engine."""
pass
try:
self._saveMetric(filename, u_nk, dHdl)
except Exception:
exception_info = traceback.format_exc()
with open(f"{self.workDir()}/{self._name}.err", "a+") as f:
f.write("Exception Information during saveMetric():\n")
f.write("======================\n")
f.write(exception_info)
f.write("\n\n")

def _convert_datadict_keys(self, datadict_keys):
"""This function is a helper function for saveMetric that converts a
Expand Down
Loading