From a593b35983895024bf58197c482fb0cda6157796 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Wed, 18 Oct 2023 21:56:33 +0100 Subject: [PATCH 1/9] Add support for restraining backbone atoms in nucleic acids --- doc/source/changelog.rst | 1 + python/BioSimSpace/_Config/_amber.py | 2 +- python/BioSimSpace/_SireWrappers/_system.py | 63 +++++++++++++++++---- 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index a2840dc79..9ab48c3a8 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -12,6 +12,7 @@ within the biomolecular simulation community. Our software is hosted via the `Op `2023.3.0 `_ - Jun 30 2023 ------------------------------------------------------------------------------------------------- +* Added support for restraining "backbone" atoms in nucleic acids (PR HERE). * Reinstate :data:`BioSimSpace.Stream ` sub-package (`#36 `__). * Fixed ``setup.py`` file to work correctly on Windows (`#72 `__). * Fixed bug with missing working directory when using ``rmsd_flex_align`` scoring function (`#75 `__). diff --git a/python/BioSimSpace/_Config/_amber.py b/python/BioSimSpace/_Config/_amber.py index 744de459f..e45b3b7d4 100644 --- a/python/BioSimSpace/_Config/_amber.py +++ b/python/BioSimSpace/_Config/_amber.py @@ -210,7 +210,7 @@ def createConfig( # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - restraint_mask = "@CA,C,O,N" + restraint_mask = "@CA,C,O,N,P,C5,C3,O3,O5" elif restraint == "heavy": restraint_mask = "!:WAT & !@H" elif restraint == "all": diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index d8200e55e..c13df7188 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -1855,6 +1855,44 @@ def getRestraintAtoms( "ASF", } + # A set of nucleic acid residues. Taken from MDAnalysis. + nucl_res = { + "ADE", + "URA", + "CYT", + "GUA", + "THY", + "DA", + "DC", + "DG", + "DT", + "RA", + "RU", + "RG", + "RC", + "A", + "T", + "U", + "C", + "G", + "DA5", + "DC5", + "DG5", + "DT5", + "DA3", + "DC3", + "DG3", + "DT3", + "RA5", + "RU5", + "RG5", + "RC5", + "RA3", + "RU3", + "RG3", + "RC3", + } + # A list of ion elements. ions = [ "F", @@ -1930,11 +1968,12 @@ def getRestraintAtoms( if self.nPerturbableMolecules() == 0: # Backbone restraints. if restraint == "backbone": - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + + ",".join(nucl_res) + + ") and (atomname N,CA,C,O,P,C5,C3,O3,O5)" ) try: search = self.search(string, property_map) @@ -1984,11 +2023,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " + "(not water) and (resname " + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + + ",".join(nucl_res) + + ") and (atomname N,CA,C,O,P,C5,C3,O3,O5)" ) try: search = mol.search(string, _property_map) @@ -2040,9 +2080,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " + ",".join(prot_res) + ") and (atomname N,CA,C,O)" + "(resname " + + ",".join(prot_res) + + ",".join(nucl_res) + + ") and (atomname N,CA,C,O,P,C5,C3,O3,O5)" ) try: search = mol.search(string, _property_map) @@ -2080,7 +2123,7 @@ def getRestraintAtoms( if num_results == 0: msg = "No atoms matched the restraint!" if restraint == "backbone": - msg += " Backbone restraints only apply to atoms in protein residues." + msg += " Backbone restraints only apply to atoms in protein residues or nucleotides." raise _IncompatibleError(msg) # Loop over the searches for each molecule. @@ -2097,9 +2140,7 @@ def getRestraintAtoms( if len(search) == 0 and not allow_zero_matches: msg = "No atoms matched the restraint!" if restraint == "backbone": - msg += ( - " Backbone restraints only apply to atoms in protein residues." - ) + msg += " Backbone restraints only apply to atoms in protein residues or nucleotides." raise _IncompatibleError(msg) # Now loop over all matching atoms and get their indices. From d3fcd464fb2d6387407e548ec3f98937f3dd68f4 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Wed, 18 Oct 2023 22:09:08 +0100 Subject: [PATCH 2/9] Correct names of nucleic acid backbone atoms --- python/BioSimSpace/_Config/_amber.py | 2 +- python/BioSimSpace/_SireWrappers/_system.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/BioSimSpace/_Config/_amber.py b/python/BioSimSpace/_Config/_amber.py index e45b3b7d4..e9cf3ff80 100644 --- a/python/BioSimSpace/_Config/_amber.py +++ b/python/BioSimSpace/_Config/_amber.py @@ -210,7 +210,7 @@ def createConfig( # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - restraint_mask = "@CA,C,O,N,P,C5,C3,O3,O5" + restraint_mask = "@CA,C,O,N,P,C5',C3',O3',O5'" elif restraint == "heavy": restraint_mask = "!:WAT & !@H" elif restraint == "all": diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index c13df7188..f168b78f6 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -1973,7 +1973,7 @@ def getRestraintAtoms( "(not water) and (resname " + ",".join(prot_res) + ",".join(nucl_res) - + ") and (atomname N,CA,C,O,P,C5,C3,O3,O5)" + + ") and (atomname N,CA,C,O,P,C5',C3',O3',O5')" ) try: search = self.search(string, property_map) @@ -2028,7 +2028,7 @@ def getRestraintAtoms( "(not water) and (resname " + ",".join(prot_res) + ",".join(nucl_res) - + ") and (atomname N,CA,C,O,P,C5,C3,O3,O5)" + + ") and (atomname N,CA,C,O,P,C5',C3',O3',O5')" ) try: search = mol.search(string, _property_map) @@ -2085,7 +2085,7 @@ def getRestraintAtoms( "(resname " + ",".join(prot_res) + ",".join(nucl_res) - + ") and (atomname N,CA,C,O,P,C5,C3,O3,O5)" + + ") and (atomname N,CA,C,O,P,C5',C3',O3',O5')" ) try: search = mol.search(string, _property_map) From 4ff8e2362abd1cc1422c460bbe84e72cd9e3204c Mon Sep 17 00:00:00 2001 From: finlayclark Date: Sun, 22 Oct 2023 20:49:05 +0100 Subject: [PATCH 3/9] Use regex mode to allow searching for atom names containing ' As discussed here https://github.com/OpenBioSim/sire/issues/115 --- python/BioSimSpace/_SireWrappers/_system.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index f168b78f6..1ac2a432f 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -1973,7 +1973,7 @@ def getRestraintAtoms( "(not water) and (resname " + ",".join(prot_res) + ",".join(nucl_res) - + ") and (atomname N,CA,C,O,P,C5',C3',O3',O5')" + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = self.search(string, property_map) @@ -2028,7 +2028,7 @@ def getRestraintAtoms( "(not water) and (resname " + ",".join(prot_res) + ",".join(nucl_res) - + ") and (atomname N,CA,C,O,P,C5',C3',O3',O5')" + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2085,7 +2085,7 @@ def getRestraintAtoms( "(resname " + ",".join(prot_res) + ",".join(nucl_res) - + ") and (atomname N,CA,C,O,P,C5',C3',O3',O5')" + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) From 7f5fc872e3cdad72c645e76eb171e3f45ace9eaa Mon Sep 17 00:00:00 2001 From: finlayclark Date: Mon, 23 Oct 2023 15:25:14 +0100 Subject: [PATCH 4/9] Ensure that amber restraint mask only contains atoms present in the system Otherwise this results in 'Error: an illegal memory access was encountered launching kernel kClearForces' --- python/BioSimSpace/_Config/_amber.py | 14 +- python/BioSimSpace/_SireWrappers/_system.py | 511 +++++++++++--------- 2 files changed, 294 insertions(+), 231 deletions(-) diff --git a/python/BioSimSpace/_Config/_amber.py b/python/BioSimSpace/_Config/_amber.py index e9cf3ff80..add4f40a9 100644 --- a/python/BioSimSpace/_Config/_amber.py +++ b/python/BioSimSpace/_Config/_amber.py @@ -210,7 +210,19 @@ def createConfig( # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - restraint_mask = "@CA,C,O,N,P,C5',C3',O3',O5'" + # Determine wether the contains protein, nucleic acid, or both. + restraint_atom_names = [] + if self._system.containsAminoAcids(): + restraint_atom_names += ["N", "CA", "C", "O"] + if self._system.containsNucleicAcids(): + restraint_atom_names += [ + "P", + "C5'", + "C3'", + "O3'", + "O5'", + ] + restraint_mask = "@" + ",".join(restraint_atom_names) elif restraint == "heavy": restraint_mask = "!:WAT & !@H" elif restraint == "all": diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index 1ac2a432f..3ea463a01 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -53,6 +53,224 @@ class System(_SireWrapper): """A container class for storing molecular systems.""" + # A set of protein residues. Taken from MDAnalysis. + prot_res = { + # CHARMM top_all27_prot_lipid.rtf + "ALA", + "ARG", + "ASN", + "ASP", + "CYS", + "GLN", + "GLU", + "GLY", + "HSD", + "HSE", + "HSP", + "ILE", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "SER", + "THR", + "TRP", + "TYR", + "VAL", + "ALAD", + ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin + # PDB + "HIS", + "MSE", + # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp + "ARGN", + "ASPH", + "CYS2", + "CYSH", + "QLN", + "PGLU", + "GLUH", + "HIS1", + "HISD", + "HISE", + "HISH", + "LYSH", + # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp + "ASN1", + "CYS1", + "HISA", + "HISB", + "HIS2", + # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp + "HID", + "HIE", + "HIP", + "ORN", + "DAB", + "LYN", + "HYP", + "CYM", + "CYX", + "ASH", + "GLH", + "ACE", + "NME", + # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp + "NALA", + "NGLY", + "NSER", + "NTHR", + "NLEU", + "NILE", + "NVAL", + "NASN", + "NGLN", + "NARG", + "NHID", + "NHIE", + "NHIP", + "NTRP", + "NPHE", + "NTYR", + "NGLU", + "NASP", + "NLYS", + "NPRO", + "NCYS", + "NCYX", + "NMET", + "CALA", + "CGLY", + "CSER", + "CTHR", + "CLEU", + "CILE", + "CVAL", + "CASF", + "CASN", + "CGLN", + "CARG", + "CHID", + "CHIE", + "CHIP", + "CTRP", + "CPHE", + "CTYR", + "CGLU", + "CASP", + "CLYS", + "CPRO", + "CCYS", + "CCYX", + "CMET", + "CME", + "ASF", + } + + # A set of nucleic acid residues. Taken from MDAnalysis. + nucl_res = { + "ADE", + "URA", + "CYT", + "GUA", + "THY", + "DA", + "DC", + "DG", + "DT", + "RA", + "RU", + "RG", + "RC", + "A", + "T", + "U", + "C", + "G", + "DA5", + "DC5", + "DG5", + "DT5", + "DA3", + "DC3", + "DG3", + "DT3", + "RA5", + "RU5", + "RG5", + "RC5", + "RA3", + "RU3", + "RG3", + "RC3", + } + + # A list of ion elements. + ions = [ + "F", + "Cl", + "Br", + "I", + "Li", + "Na", + "K", + "Rb", + "Cs", + "Mg", + "Tl", + "Cu", + "Ag", + "Be", + "Cu", + "Ni", + "Pt", + "Zn", + "Co", + "Pd", + "Ag", + "Cr", + "Fe", + "Mg", + "V", + "Mn", + "Hg", + "Cd", + "Yb", + "Ca", + "Sn", + "Pb", + "Eu", + "Sr", + "Sm", + "Ba", + "Ra", + "Al", + "Fe", + "Cr", + "In", + "Tl", + "Y", + "La", + "Ce", + "Pr", + "Nd", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Er", + "Tm", + "Lu", + "Hf", + "Zr", + "Ce", + "U", + "Pu", + "Th", + ] + def __init__(self, system): """ Constructor. @@ -1740,224 +1958,6 @@ def getRestraintAtoms( # Initialise the list of indices. indices = [] - # A set of protein residues. Taken from MDAnalysis. - prot_res = { - # CHARMM top_all27_prot_lipid.rtf - "ALA", - "ARG", - "ASN", - "ASP", - "CYS", - "GLN", - "GLU", - "GLY", - "HSD", - "HSE", - "HSP", - "ILE", - "LEU", - "LYS", - "MET", - "PHE", - "PRO", - "SER", - "THR", - "TRP", - "TYR", - "VAL", - "ALAD", - ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin - # PDB - "HIS", - "MSE", - # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp - "ARGN", - "ASPH", - "CYS2", - "CYSH", - "QLN", - "PGLU", - "GLUH", - "HIS1", - "HISD", - "HISE", - "HISH", - "LYSH", - # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp - "ASN1", - "CYS1", - "HISA", - "HISB", - "HIS2", - # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp - "HID", - "HIE", - "HIP", - "ORN", - "DAB", - "LYN", - "HYP", - "CYM", - "CYX", - "ASH", - "GLH", - "ACE", - "NME", - # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp - "NALA", - "NGLY", - "NSER", - "NTHR", - "NLEU", - "NILE", - "NVAL", - "NASN", - "NGLN", - "NARG", - "NHID", - "NHIE", - "NHIP", - "NTRP", - "NPHE", - "NTYR", - "NGLU", - "NASP", - "NLYS", - "NPRO", - "NCYS", - "NCYX", - "NMET", - "CALA", - "CGLY", - "CSER", - "CTHR", - "CLEU", - "CILE", - "CVAL", - "CASF", - "CASN", - "CGLN", - "CARG", - "CHID", - "CHIE", - "CHIP", - "CTRP", - "CPHE", - "CTYR", - "CGLU", - "CASP", - "CLYS", - "CPRO", - "CCYS", - "CCYX", - "CMET", - "CME", - "ASF", - } - - # A set of nucleic acid residues. Taken from MDAnalysis. - nucl_res = { - "ADE", - "URA", - "CYT", - "GUA", - "THY", - "DA", - "DC", - "DG", - "DT", - "RA", - "RU", - "RG", - "RC", - "A", - "T", - "U", - "C", - "G", - "DA5", - "DC5", - "DG5", - "DT5", - "DA3", - "DC3", - "DG3", - "DT3", - "RA5", - "RU5", - "RG5", - "RC5", - "RA3", - "RU3", - "RG3", - "RC3", - } - - # A list of ion elements. - ions = [ - "F", - "Cl", - "Br", - "I", - "Li", - "Na", - "K", - "Rb", - "Cs", - "Mg", - "Tl", - "Cu", - "Ag", - "Be", - "Cu", - "Ni", - "Pt", - "Zn", - "Co", - "Pd", - "Ag", - "Cr", - "Fe", - "Mg", - "V", - "Mn", - "Hg", - "Cd", - "Yb", - "Ca", - "Sn", - "Pb", - "Eu", - "Sr", - "Sm", - "Ba", - "Ra", - "Al", - "Fe", - "Cr", - "In", - "Tl", - "Y", - "La", - "Ce", - "Pr", - "Nd", - "Sm", - "Eu", - "Gd", - "Tb", - "Dy", - "Er", - "Tm", - "Lu", - "Hf", - "Zr", - "Ce", - "U", - "Pu", - "Th", - ] - # Whether we've searched a perturbable system. If so, then we need to process # the search results differently. (There will be one for each molecule.) is_perturbable_system = False @@ -1971,8 +1971,8 @@ def getRestraintAtoms( # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(prot_res) - + ",".join(nucl_res) + + ",".join(self.prot_res) + + ",".join(self.nucl_res) + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: @@ -1982,7 +1982,7 @@ def getRestraintAtoms( elif restraint == "heavy": # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(self.ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -1992,7 +1992,7 @@ def getRestraintAtoms( elif restraint == "all": # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(self.ions) # Find all non-water, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2026,8 +2026,8 @@ def getRestraintAtoms( # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(prot_res) - + ",".join(nucl_res) + + ",".join(self.prot_res) + + ",".join(self.nucl_res) + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: @@ -2038,7 +2038,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(self.ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2049,7 +2049,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(self.ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2083,8 +2083,8 @@ def getRestraintAtoms( # Find all backbone atoms in protein residues or nucleotides. string = ( "(resname " - + ",".join(prot_res) - + ",".join(nucl_res) + + ",".join(self.prot_res) + + ",".join(self.nucl_res) + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: @@ -2095,7 +2095,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(self.ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2106,7 +2106,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(self.ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2155,6 +2155,56 @@ def getRestraintAtoms( return indices + def containsAminoAcids(self, property_map={}): + """ + Check whether the system contains nucleic acids. + + Parameters + ---------- + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + contains_aa : bool + True if the system contains amino acids, False otherwise. + """ + string = "(resname " + ",".join(self.prot_res) + ")" + try: + search = self.search(string, property_map) + except: + search = [] + + return bool(search) + + def containsNucleicAcids(self, property_map={}): + """ + Check whether the system contains nucleic acids. + + Parameters + ---------- + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + contains_na : bool + True if the system contains amino acids, False otherwise. + """ + string = "(resname " + ",".join(self.nucl_res) + ")" + try: + search = self.search(string, property_map) + except: + search = [] + + return bool(search) + def _isParameterised(self, property_map={}): """ Whether the system is parameterised, i.e. can we run a simulation @@ -2168,6 +2218,7 @@ def _isParameterised(self, property_map={}): property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- From fe68da35c5de5b6158e3e6054d686421a5bc52b3 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 24 Oct 2023 11:00:15 +0100 Subject: [PATCH 5/9] Hide residue name lists and improve search functions This addresses code review comments here https://github.com/OpenBioSim/biosimspace/pull/189. --- python/BioSimSpace/_Config/_amber.py | 4 +- python/BioSimSpace/_SireWrappers/_system.py | 311 ++++---------------- python/BioSimSpace/_SireWrappers/_utils.py | 241 +++++++++++++++ 3 files changed, 302 insertions(+), 254 deletions(-) create mode 100644 python/BioSimSpace/_SireWrappers/_utils.py diff --git a/python/BioSimSpace/_Config/_amber.py b/python/BioSimSpace/_Config/_amber.py index add4f40a9..407adeefe 100644 --- a/python/BioSimSpace/_Config/_amber.py +++ b/python/BioSimSpace/_Config/_amber.py @@ -212,9 +212,9 @@ def createConfig( if restraint == "backbone": # Determine wether the contains protein, nucleic acid, or both. restraint_atom_names = [] - if self._system.containsAminoAcids(): + if self._system.nAminoAcids() > 0: restraint_atom_names += ["N", "CA", "C", "O"] - if self._system.containsNucleicAcids(): + if self._system.nNucleotides() > 0: restraint_atom_names += [ "P", "C5'", diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index 3ea463a01..484f728b6 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -46,6 +46,7 @@ from .. import Units as _Units from ._sire_wrapper import SireWrapper as _SireWrapper +from ._utils import _prot_res, _nucl_res, _ions from sire.mol import Select as _Select @@ -53,224 +54,6 @@ class System(_SireWrapper): """A container class for storing molecular systems.""" - # A set of protein residues. Taken from MDAnalysis. - prot_res = { - # CHARMM top_all27_prot_lipid.rtf - "ALA", - "ARG", - "ASN", - "ASP", - "CYS", - "GLN", - "GLU", - "GLY", - "HSD", - "HSE", - "HSP", - "ILE", - "LEU", - "LYS", - "MET", - "PHE", - "PRO", - "SER", - "THR", - "TRP", - "TYR", - "VAL", - "ALAD", - ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin - # PDB - "HIS", - "MSE", - # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp - "ARGN", - "ASPH", - "CYS2", - "CYSH", - "QLN", - "PGLU", - "GLUH", - "HIS1", - "HISD", - "HISE", - "HISH", - "LYSH", - # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp - "ASN1", - "CYS1", - "HISA", - "HISB", - "HIS2", - # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp - "HID", - "HIE", - "HIP", - "ORN", - "DAB", - "LYN", - "HYP", - "CYM", - "CYX", - "ASH", - "GLH", - "ACE", - "NME", - # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp - "NALA", - "NGLY", - "NSER", - "NTHR", - "NLEU", - "NILE", - "NVAL", - "NASN", - "NGLN", - "NARG", - "NHID", - "NHIE", - "NHIP", - "NTRP", - "NPHE", - "NTYR", - "NGLU", - "NASP", - "NLYS", - "NPRO", - "NCYS", - "NCYX", - "NMET", - "CALA", - "CGLY", - "CSER", - "CTHR", - "CLEU", - "CILE", - "CVAL", - "CASF", - "CASN", - "CGLN", - "CARG", - "CHID", - "CHIE", - "CHIP", - "CTRP", - "CPHE", - "CTYR", - "CGLU", - "CASP", - "CLYS", - "CPRO", - "CCYS", - "CCYX", - "CMET", - "CME", - "ASF", - } - - # A set of nucleic acid residues. Taken from MDAnalysis. - nucl_res = { - "ADE", - "URA", - "CYT", - "GUA", - "THY", - "DA", - "DC", - "DG", - "DT", - "RA", - "RU", - "RG", - "RC", - "A", - "T", - "U", - "C", - "G", - "DA5", - "DC5", - "DG5", - "DT5", - "DA3", - "DC3", - "DG3", - "DT3", - "RA5", - "RU5", - "RG5", - "RC5", - "RA3", - "RU3", - "RG3", - "RC3", - } - - # A list of ion elements. - ions = [ - "F", - "Cl", - "Br", - "I", - "Li", - "Na", - "K", - "Rb", - "Cs", - "Mg", - "Tl", - "Cu", - "Ag", - "Be", - "Cu", - "Ni", - "Pt", - "Zn", - "Co", - "Pd", - "Ag", - "Cr", - "Fe", - "Mg", - "V", - "Mn", - "Hg", - "Cd", - "Yb", - "Ca", - "Sn", - "Pb", - "Eu", - "Sr", - "Sm", - "Ba", - "Ra", - "Al", - "Fe", - "Cr", - "In", - "Tl", - "Y", - "La", - "Ce", - "Pr", - "Nd", - "Sm", - "Eu", - "Gd", - "Tb", - "Dy", - "Er", - "Tm", - "Lu", - "Hf", - "Zr", - "Ce", - "U", - "Pu", - "Th", - ] - def __init__(self, system): """ Constructor. @@ -1971,8 +1754,8 @@ def getRestraintAtoms( # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(self.prot_res) - + ",".join(self.nucl_res) + + ",".join(_prot_res) + + ",".join(_nucl_res) + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: @@ -1982,7 +1765,7 @@ def getRestraintAtoms( elif restraint == "heavy": # Convert to a formatted string for the search. - ion_string = ",".join(self.ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -1992,7 +1775,7 @@ def getRestraintAtoms( elif restraint == "all": # Convert to a formatted string for the search. - ion_string = ",".join(self.ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2026,8 +1809,8 @@ def getRestraintAtoms( # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(self.prot_res) - + ",".join(self.nucl_res) + + ",".join(_prot_res) + + ",".join(_nucl_res) + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: @@ -2038,7 +1821,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(self.ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2049,7 +1832,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(self.ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2083,8 +1866,8 @@ def getRestraintAtoms( # Find all backbone atoms in protein residues or nucleotides. string = ( "(resname " - + ",".join(self.prot_res) - + ",".join(self.nucl_res) + + ",".join(_prot_res) + + ",".join(_nucl_res) + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: @@ -2095,7 +1878,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(self.ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2106,7 +1889,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(self.ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2155,55 +1938,79 @@ def getRestraintAtoms( return indices - def containsAminoAcids(self, property_map={}): + def getAminoAcids(self, property_map={}): """ - Check whether the system contains nucleic acids. + Return a list containing all of the amino acid residues in the system. Parameters ---------- + property_map : dict - A dictionary that maps system "properties" to their user defined - values. This allows the user to refer to properties with their - own naming scheme, e.g. { "charge" : "my-charge" } + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- - contains_aa : bool - True if the system contains amino acids, False otherwise. + residues : [:class:`Residue `] + The list of all amino acid residues in the system. """ - string = "(resname " + ",".join(self.prot_res) + ")" + search_string = "(resname " + ",".join(_prot_res) + ")" try: - search = self.search(string, property_map) + residues = list(self.search(search_string, property_map)) except: - search = [] + residues = [] + return residues + + def nAminoAcids(self): + """ + Return the number of amino acid residues in the system. - return bool(search) + Returns + ------- + + num_aa : int + The number of amino acid residues in the system. + """ + return len(self.getAminoAcids()) - def containsNucleicAcids(self, property_map={}): + def getNucleotides(self, property_map={}): """ - Check whether the system contains nucleic acids. + Return a list containing all of the nucleotide residues in the system. Parameters ---------- + property_map : dict - A dictionary that maps system "properties" to their user defined - values. This allows the user to refer to properties with their - own naming scheme, e.g. { "charge" : "my-charge" } + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- - contains_na : bool - True if the system contains amino acids, False otherwise. + residues : [:class:`Residue `] + The list of all nucleotide residues in the system. """ - string = "(resname " + ",".join(self.nucl_res) + ")" + search_string = "(resname " + ",".join(_nucl_res) + ")" try: - search = self.search(string, property_map) + residues = list(self.search(search_string, property_map)) except: - search = [] + residues = [] + return residues + + def nNucleotides(self): + """ + Return the number of nucleotide residues in the system. - return bool(search) + Returns + ------- + + num_nuc : int + The number of nucleotide residues in the system. + """ + return len(self.getNucleotides()) def _isParameterised(self, property_map={}): """ diff --git a/python/BioSimSpace/_SireWrappers/_utils.py b/python/BioSimSpace/_SireWrappers/_utils.py new file mode 100644 index 000000000..faab3e069 --- /dev/null +++ b/python/BioSimSpace/_SireWrappers/_utils.py @@ -0,0 +1,241 @@ +###################################################################### +# BioSimSpace: Making biomolecular simulation a breeze! +# +# Copyright: 2017-2023 +# +# Authors: Lester Hedges +# +# BioSimSpace is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# BioSimSpace is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BioSimSpace. If not, see . +##################################################################### +""" +Utilities. +""" + +# A set of protein residues. Taken from MDAnalysis. +_prot_res = { + # CHARMM top_all27_prot_lipid.rtf + "ALA", + "ARG", + "ASN", + "ASP", + "CYS", + "GLN", + "GLU", + "GLY", + "HSD", + "HSE", + "HSP", + "ILE", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "SER", + "THR", + "TRP", + "TYR", + "VAL", + "ALAD", + ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin + # PDB + "HIS", + "MSE", + # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp + "ARGN", + "ASPH", + "CYS2", + "CYSH", + "QLN", + "PGLU", + "GLUH", + "HIS1", + "HISD", + "HISE", + "HISH", + "LYSH", + # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp + "ASN1", + "CYS1", + "HISA", + "HISB", + "HIS2", + # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp + "HID", + "HIE", + "HIP", + "ORN", + "DAB", + "LYN", + "HYP", + "CYM", + "CYX", + "ASH", + "GLH", + "ACE", + "NME", + # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp + "NALA", + "NGLY", + "NSER", + "NTHR", + "NLEU", + "NILE", + "NVAL", + "NASN", + "NGLN", + "NARG", + "NHID", + "NHIE", + "NHIP", + "NTRP", + "NPHE", + "NTYR", + "NGLU", + "NASP", + "NLYS", + "NPRO", + "NCYS", + "NCYX", + "NMET", + "CALA", + "CGLY", + "CSER", + "CTHR", + "CLEU", + "CILE", + "CVAL", + "CASF", + "CASN", + "CGLN", + "CARG", + "CHID", + "CHIE", + "CHIP", + "CTRP", + "CPHE", + "CTYR", + "CGLU", + "CASP", + "CLYS", + "CPRO", + "CCYS", + "CCYX", + "CMET", + "CME", + "ASF", +} + +# A set of nucleic acid residues. Taken from MDAnalysis. +_nucl_res = { + "ADE", + "URA", + "CYT", + "GUA", + "THY", + "DA", + "DC", + "DG", + "DT", + "RA", + "RU", + "RG", + "RC", + "A", + "T", + "U", + "C", + "G", + "DA5", + "DC5", + "DG5", + "DT5", + "DA3", + "DC3", + "DG3", + "DT3", + "RA5", + "RU5", + "RG5", + "RC5", + "RA3", + "RU3", + "RG3", + "RC3", +} + +# A list of ion elements. +_ions = [ + "F", + "Cl", + "Br", + "I", + "Li", + "Na", + "K", + "Rb", + "Cs", + "Mg", + "Tl", + "Cu", + "Ag", + "Be", + "Cu", + "Ni", + "Pt", + "Zn", + "Co", + "Pd", + "Ag", + "Cr", + "Fe", + "Mg", + "V", + "Mn", + "Hg", + "Cd", + "Yb", + "Ca", + "Sn", + "Pb", + "Eu", + "Sr", + "Sm", + "Ba", + "Ra", + "Al", + "Fe", + "Cr", + "In", + "Tl", + "Y", + "La", + "Ce", + "Pr", + "Nd", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Er", + "Tm", + "Lu", + "Hf", + "Zr", + "Ce", + "U", + "Pu", + "Th", +] From 429dfc69773ee760cdd2ce096d0016a46a098258 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 24 Oct 2023 13:20:50 +0100 Subject: [PATCH 6/9] Add tests for RNA backbone restraint functionality --- tests/Process/test_amber.py | 52 ++++++++++++++++++++++++++++++ tests/_SireWrappers/test_system.py | 18 +++++++++++ 2 files changed, 70 insertions(+) diff --git a/tests/Process/test_amber.py b/tests/Process/test_amber.py index 7267e4506..89355c80b 100644 --- a/tests/Process/test_amber.py +++ b/tests/Process/test_amber.py @@ -15,6 +15,22 @@ def system(): return BSS.IO.readMolecules(["tests/input/ala.top", "tests/input/ala.crd"]) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + +@pytest.fixture(scope="session") +def large_protein_system(): + """A large protein system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["complex_vac0.prm7", "complex_vac0.rst7"]) + ) + + @pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") @pytest.mark.parametrize("restraint", restraints) def test_minimise(system, restraint): @@ -193,6 +209,42 @@ def test_args(system): assert arg_string == "-x X -a A -b B -y -e -f 6 -g -h H -k K -z Z" +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_protein(large_protein_system): + """ + Test that the amber backbone restraint mask is correct for a protein system. + We need a large protein system otherwise the logic we want to test will be + skipped, and individual atoms will be specified in the config. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(large_protein_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert ' restraintmask="@N,CA,C,O",' in config + + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_rna(rna_system): + """ + Test that the amber backbone restraint mask is correct for an RNA system. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(rna_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert " restraintmask=\"@P,C5',C3',O3',O5'\"," in config + + def run_process(system, protocol, check_data=False): """Helper function to run various simulation protocols.""" diff --git a/tests/_SireWrappers/test_system.py b/tests/_SireWrappers/test_system.py index 3f8cdb126..7e026eafb 100644 --- a/tests/_SireWrappers/test_system.py +++ b/tests/_SireWrappers/test_system.py @@ -14,6 +14,14 @@ def system(): return BSS.IO.readMolecules(["tests/input/ala.top", "tests/input/ala.crd"]) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + # Parameterise the function with a set of molecule indices. @pytest.mark.parametrize("index", [0, -1]) def test_molecule_equivalence(system, index): @@ -447,3 +455,13 @@ def test_rotate_box_vectors(system): assert math.isclose(c1.x().value(), c0.z().value()) assert math.isclose(c1.y().value(), c0.x().value()) assert math.isclose(c1.z().value(), c0.y().value()) + + +def test_residue_searches_protein(system): + assert len(system.getAminoAcids()) == system.nAminoAcids() == 3 + assert len(system.getNucleotides()) == system.nNucleotides() == 0 + + +def test_residue_searches_rna(rna_system): + assert len(rna_system.getAminoAcids()) == rna_system.nAminoAcids() == 0 + assert len(rna_system.getNucleotides()) == rna_system.nNucleotides() == 29 From 98ec277554a861e1fd8fadcfe2140e9118783600 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 24 Oct 2023 13:38:25 +0100 Subject: [PATCH 7/9] Copy nucleic acid backbone restraint functionality to Sandpit --- .../Sandpit/Exscientia/Protocol/_config.py | 14 +- .../Exscientia/_SireWrappers/_system.py | 290 ++++++------------ .../Exscientia/_SireWrappers/_utils.py | 241 +++++++++++++++ python/BioSimSpace/_Config/_amber.py | 2 +- .../Sandpit/Exscientia/Process/test_amber.py | 52 ++++ .../Exscientia/_SireWrappers/test_system.py | 18 ++ 6 files changed, 420 insertions(+), 197 deletions(-) create mode 100644 python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py index e71e1369d..3f2bb42d8 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py @@ -276,7 +276,19 @@ def generateAmberConfig(self, extra_options=None, extra_lines=None): # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - restraint_mask = "@CA,C,O,N" + # Determine wether the system contains protein, nucleic acid, or both. + restraint_atom_names = [] + if self.system.nAminoAcids() > 0: + restraint_atom_names += ["N", "CA", "C", "O"] + if self.system.nNucleotides() > 0: + restraint_atom_names += [ + "P", + "C5'", + "C3'", + "O3'", + "O5'", + ] + restraint_mask = "@" + ",".join(restraint_atom_names) elif restraint == "heavy": restraint_mask = "!:WAT & !@H" elif restraint == "all": diff --git a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py index 0cb99ef12..589972f86 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py +++ b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py @@ -46,6 +46,7 @@ from .. import Units as _Units from ._sire_wrapper import SireWrapper as _SireWrapper +from ._utils import _prot_res, _nucl_res, _ions from sire.mol import Select as _Select @@ -1819,186 +1820,6 @@ def getRestraintAtoms( # Initialise the list of indices. indices = [] - # A set of protein residues. Taken from MDAnalysis. - prot_res = { - # CHARMM top_all27_prot_lipid.rtf - "ALA", - "ARG", - "ASN", - "ASP", - "CYS", - "GLN", - "GLU", - "GLY", - "HSD", - "HSE", - "HSP", - "ILE", - "LEU", - "LYS", - "MET", - "PHE", - "PRO", - "SER", - "THR", - "TRP", - "TYR", - "VAL", - "ALAD", - ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin - # PDB - "HIS", - "MSE", - # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp - "ARGN", - "ASPH", - "CYS2", - "CYSH", - "QLN", - "PGLU", - "GLUH", - "HIS1", - "HISD", - "HISE", - "HISH", - "LYSH", - # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp - "ASN1", - "CYS1", - "HISA", - "HISB", - "HIS2", - # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp - "HID", - "HIE", - "HIP", - "ORN", - "DAB", - "LYN", - "HYP", - "CYM", - "CYX", - "ASH", - "GLH", - "ACE", - "NME", - # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp - "NALA", - "NGLY", - "NSER", - "NTHR", - "NLEU", - "NILE", - "NVAL", - "NASN", - "NGLN", - "NARG", - "NHID", - "NHIE", - "NHIP", - "NTRP", - "NPHE", - "NTYR", - "NGLU", - "NASP", - "NLYS", - "NPRO", - "NCYS", - "NCYX", - "NMET", - "CALA", - "CGLY", - "CSER", - "CTHR", - "CLEU", - "CILE", - "CVAL", - "CASF", - "CASN", - "CGLN", - "CARG", - "CHID", - "CHIE", - "CHIP", - "CTRP", - "CPHE", - "CTYR", - "CGLU", - "CASP", - "CLYS", - "CPRO", - "CCYS", - "CCYX", - "CMET", - "CME", - "ASF", - } - - # A list of ion elements. - ions = [ - "F", - "Cl", - "Br", - "I", - "Li", - "Na", - "K", - "Rb", - "Cs", - "Mg", - "Tl", - "Cu", - "Ag", - "Be", - "Cu", - "Ni", - "Pt", - "Zn", - "Co", - "Pd", - "Ag", - "Cr", - "Fe", - "Mg", - "V", - "Mn", - "Hg", - "Cd", - "Yb", - "Ca", - "Sn", - "Pb", - "Eu", - "Sr", - "Sm", - "Ba", - "Ra", - "Al", - "Fe", - "Cr", - "In", - "Tl", - "Y", - "La", - "Ce", - "Pr", - "Nd", - "Sm", - "Eu", - "Gd", - "Tb", - "Dy", - "Er", - "Tm", - "Lu", - "Hf", - "Zr", - "Ce", - "U", - "Pu", - "Th", - ] - # Whether we've searched a perturbable system. If so, then we need to process # the search results differently. (There will be one for each molecule.) is_perturbable_system = False @@ -2009,11 +1830,12 @@ def getRestraintAtoms( if self.nPerturbableMolecules() == 0: # Backbone restraints. if restraint == "backbone": - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = self.search(string, property_map) @@ -2022,7 +1844,7 @@ def getRestraintAtoms( elif restraint == "heavy": # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2032,7 +1854,7 @@ def getRestraintAtoms( elif restraint == "all": # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2063,11 +1885,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " - + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + "(not water) and (resname " + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2077,7 +1900,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2088,7 +1911,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2119,9 +1942,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " + ",".join(prot_res) + ") and (atomname N,CA,C,O)" + "(not water) and (resname " + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2131,7 +1957,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2142,7 +1968,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2193,6 +2019,80 @@ def getRestraintAtoms( return indices + def getAminoAcids(self, property_map={}): + """ + Return a list containing all of the amino acid residues in the system. + + Parameters + ---------- + + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + residues : [:class:`Residue `] + The list of all amino acid residues in the system. + """ + search_string = "(resname " + ",".join(_prot_res) + ")" + try: + residues = list(self.search(search_string, property_map)) + except: + residues = [] + return residues + + def nAminoAcids(self): + """ + Return the number of amino acid residues in the system. + + Returns + ------- + + num_aa : int + The number of amino acid residues in the system. + """ + return len(self.getAminoAcids()) + + def getNucleotides(self, property_map={}): + """ + Return a list containing all of the nucleotide residues in the system. + + Parameters + ---------- + + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + residues : [:class:`Residue `] + The list of all nucleotide residues in the system. + """ + search_string = "(resname " + ",".join(_nucl_res) + ")" + try: + residues = list(self.search(search_string, property_map)) + except: + residues = [] + return residues + + def nNucleotides(self): + """ + Return the number of nucleotide residues in the system. + + Returns + ------- + + num_nuc : int + The number of nucleotide residues in the system. + """ + return len(self.getNucleotides()) + def _isParameterised(self, property_map={}): """ Whether the system is parameterised, i.e. can we run a simulation diff --git a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py new file mode 100644 index 000000000..faab3e069 --- /dev/null +++ b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py @@ -0,0 +1,241 @@ +###################################################################### +# BioSimSpace: Making biomolecular simulation a breeze! +# +# Copyright: 2017-2023 +# +# Authors: Lester Hedges +# +# BioSimSpace is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# BioSimSpace is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BioSimSpace. If not, see . +##################################################################### +""" +Utilities. +""" + +# A set of protein residues. Taken from MDAnalysis. +_prot_res = { + # CHARMM top_all27_prot_lipid.rtf + "ALA", + "ARG", + "ASN", + "ASP", + "CYS", + "GLN", + "GLU", + "GLY", + "HSD", + "HSE", + "HSP", + "ILE", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "SER", + "THR", + "TRP", + "TYR", + "VAL", + "ALAD", + ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin + # PDB + "HIS", + "MSE", + # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp + "ARGN", + "ASPH", + "CYS2", + "CYSH", + "QLN", + "PGLU", + "GLUH", + "HIS1", + "HISD", + "HISE", + "HISH", + "LYSH", + # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp + "ASN1", + "CYS1", + "HISA", + "HISB", + "HIS2", + # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp + "HID", + "HIE", + "HIP", + "ORN", + "DAB", + "LYN", + "HYP", + "CYM", + "CYX", + "ASH", + "GLH", + "ACE", + "NME", + # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp + "NALA", + "NGLY", + "NSER", + "NTHR", + "NLEU", + "NILE", + "NVAL", + "NASN", + "NGLN", + "NARG", + "NHID", + "NHIE", + "NHIP", + "NTRP", + "NPHE", + "NTYR", + "NGLU", + "NASP", + "NLYS", + "NPRO", + "NCYS", + "NCYX", + "NMET", + "CALA", + "CGLY", + "CSER", + "CTHR", + "CLEU", + "CILE", + "CVAL", + "CASF", + "CASN", + "CGLN", + "CARG", + "CHID", + "CHIE", + "CHIP", + "CTRP", + "CPHE", + "CTYR", + "CGLU", + "CASP", + "CLYS", + "CPRO", + "CCYS", + "CCYX", + "CMET", + "CME", + "ASF", +} + +# A set of nucleic acid residues. Taken from MDAnalysis. +_nucl_res = { + "ADE", + "URA", + "CYT", + "GUA", + "THY", + "DA", + "DC", + "DG", + "DT", + "RA", + "RU", + "RG", + "RC", + "A", + "T", + "U", + "C", + "G", + "DA5", + "DC5", + "DG5", + "DT5", + "DA3", + "DC3", + "DG3", + "DT3", + "RA5", + "RU5", + "RG5", + "RC5", + "RA3", + "RU3", + "RG3", + "RC3", +} + +# A list of ion elements. +_ions = [ + "F", + "Cl", + "Br", + "I", + "Li", + "Na", + "K", + "Rb", + "Cs", + "Mg", + "Tl", + "Cu", + "Ag", + "Be", + "Cu", + "Ni", + "Pt", + "Zn", + "Co", + "Pd", + "Ag", + "Cr", + "Fe", + "Mg", + "V", + "Mn", + "Hg", + "Cd", + "Yb", + "Ca", + "Sn", + "Pb", + "Eu", + "Sr", + "Sm", + "Ba", + "Ra", + "Al", + "Fe", + "Cr", + "In", + "Tl", + "Y", + "La", + "Ce", + "Pr", + "Nd", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Er", + "Tm", + "Lu", + "Hf", + "Zr", + "Ce", + "U", + "Pu", + "Th", +] diff --git a/python/BioSimSpace/_Config/_amber.py b/python/BioSimSpace/_Config/_amber.py index 407adeefe..a80a90c89 100644 --- a/python/BioSimSpace/_Config/_amber.py +++ b/python/BioSimSpace/_Config/_amber.py @@ -210,7 +210,7 @@ def createConfig( # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - # Determine wether the contains protein, nucleic acid, or both. + # Determine wether the system contains protein, nucleic acid, or both. restraint_atom_names = [] if self._system.nAminoAcids() > 0: restraint_atom_names += ["N", "CA", "C", "O"] diff --git a/tests/Sandpit/Exscientia/Process/test_amber.py b/tests/Sandpit/Exscientia/Process/test_amber.py index 60168a707..73866d01a 100644 --- a/tests/Sandpit/Exscientia/Process/test_amber.py +++ b/tests/Sandpit/Exscientia/Process/test_amber.py @@ -21,6 +21,22 @@ def system(): ) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + +@pytest.fixture(scope="session") +def large_protein_system(): + """A large protein system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["complex_vac0.prm7", "complex_vac0.rst7"]) + ) + + @pytest.mark.skipif( has_amber is False or has_pyarrow is False, reason="Requires AMBER and pyarrow to be installed.", @@ -213,6 +229,42 @@ def test_args(system): assert arg_string == "-x X -a A -b B -y -e -f 6 -g -h H -k K -z Z" +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_protein(large_protein_system): + """ + Test that the amber backbone restraint mask is correct for a protein system. + We need a large protein system otherwise the logic we want to test will be + skipped, and individual atoms will be specified in the config. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(large_protein_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert ' restraintmask="@N,CA,C,O",' in config + + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_rna(rna_system): + """ + Test that the amber backbone restraint mask is correct for an RNA system. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(rna_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert " restraintmask=\"@P,C5',C3',O3',O5'\"," in config + + def run_process(system, protocol, check_data=False): """Helper function to run various simulation protocols.""" diff --git a/tests/Sandpit/Exscientia/_SireWrappers/test_system.py b/tests/Sandpit/Exscientia/_SireWrappers/test_system.py index a0f78b116..771fb7624 100644 --- a/tests/Sandpit/Exscientia/_SireWrappers/test_system.py +++ b/tests/Sandpit/Exscientia/_SireWrappers/test_system.py @@ -17,6 +17,14 @@ def system(): ) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + # Parameterise the function with a set of molecule indices. @pytest.mark.parametrize("index", [0, -1]) def test_molecule_equivalence(system, index): @@ -451,3 +459,13 @@ def test_rotate_box_vectors(system): assert math.isclose(c1.x().value(), c0.z().value()) assert math.isclose(c1.y().value(), c0.x().value()) assert math.isclose(c1.z().value(), c0.y().value()) + + +def test_residue_searches_protein(system): + assert len(system.getAminoAcids()) == system.nAminoAcids() == 3 + assert len(system.getNucleotides()) == system.nNucleotides() == 0 + + +def test_residue_searches_rna(rna_system): + assert len(rna_system.getAminoAcids()) == rna_system.nAminoAcids() == 0 + assert len(rna_system.getNucleotides()) == rna_system.nNucleotides() == 29 From 609cca8f605120d100a6633450d2b4938a14422c Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 24 Oct 2023 15:47:44 +0100 Subject: [PATCH 8/9] Test return type of residue searches [ci skip] --- tests/Sandpit/Exscientia/_SireWrappers/test_system.py | 8 ++++++-- tests/_SireWrappers/test_system.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/Sandpit/Exscientia/_SireWrappers/test_system.py b/tests/Sandpit/Exscientia/_SireWrappers/test_system.py index 771fb7624..8e5d6b596 100644 --- a/tests/Sandpit/Exscientia/_SireWrappers/test_system.py +++ b/tests/Sandpit/Exscientia/_SireWrappers/test_system.py @@ -462,10 +462,14 @@ def test_rotate_box_vectors(system): def test_residue_searches_protein(system): - assert len(system.getAminoAcids()) == system.nAminoAcids() == 3 + amino_acids = system.getAminoAcids() + assert isinstance(amino_acids[0], BSS._SireWrappers._residue.Residue) + assert len(amino_acids) == system.nAminoAcids() == 3 assert len(system.getNucleotides()) == system.nNucleotides() == 0 def test_residue_searches_rna(rna_system): + nucleotides = rna_system.getNucleotides() + assert isinstance(nucleotides[0], BSS._SireWrappers._residue.Residue) assert len(rna_system.getAminoAcids()) == rna_system.nAminoAcids() == 0 - assert len(rna_system.getNucleotides()) == rna_system.nNucleotides() == 29 + assert len(nucleotides) == rna_system.nNucleotides() == 29 diff --git a/tests/_SireWrappers/test_system.py b/tests/_SireWrappers/test_system.py index 7e026eafb..a7234cb00 100644 --- a/tests/_SireWrappers/test_system.py +++ b/tests/_SireWrappers/test_system.py @@ -458,10 +458,14 @@ def test_rotate_box_vectors(system): def test_residue_searches_protein(system): - assert len(system.getAminoAcids()) == system.nAminoAcids() == 3 + amino_acids = system.getAminoAcids() + assert isinstance(amino_acids[0], BSS._SireWrappers._residue.Residue) + assert len(amino_acids) == system.nAminoAcids() == 3 assert len(system.getNucleotides()) == system.nNucleotides() == 0 def test_residue_searches_rna(rna_system): + nucleotides = rna_system.getNucleotides() + assert isinstance(nucleotides[0], BSS._SireWrappers._residue.Residue) assert len(rna_system.getAminoAcids()) == rna_system.nAminoAcids() == 0 - assert len(rna_system.getNucleotides()) == rna_system.nNucleotides() == 29 + assert len(nucleotides) == rna_system.nNucleotides() == 29 From 0ae9a3d099529aa1924591092807f024fa8acb8e Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 24 Oct 2023 15:54:24 +0100 Subject: [PATCH 9/9] Ensure that residue search functions always return residues [ci skip] Do this by using .residues() on the result of the search. --- .../BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py | 4 ++-- python/BioSimSpace/_SireWrappers/_system.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py index 589972f86..ff82b5460 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py +++ b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py @@ -2039,7 +2039,7 @@ def getAminoAcids(self, property_map={}): """ search_string = "(resname " + ",".join(_prot_res) + ")" try: - residues = list(self.search(search_string, property_map)) + residues = list(self.search(search_string, property_map).residues()) except: residues = [] return residues @@ -2076,7 +2076,7 @@ def getNucleotides(self, property_map={}): """ search_string = "(resname " + ",".join(_nucl_res) + ")" try: - residues = list(self.search(search_string, property_map)) + residues = list(self.search(search_string, property_map).residues()) except: residues = [] return residues diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index 484f728b6..42c780b22 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -1958,7 +1958,7 @@ def getAminoAcids(self, property_map={}): """ search_string = "(resname " + ",".join(_prot_res) + ")" try: - residues = list(self.search(search_string, property_map)) + residues = list(self.search(search_string, property_map).residues()) except: residues = [] return residues @@ -1995,7 +1995,7 @@ def getNucleotides(self, property_map={}): """ search_string = "(resname " + ",".join(_nucl_res) + ")" try: - residues = list(self.search(search_string, property_map)) + residues = list(self.search(search_string, property_map).residues()) except: residues = [] return residues