Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parsing negative values in ATOMIC_NUMBER records of AMBER topologies #2307

Merged
merged 4 commits into from Aug 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions package/CHANGELOG
Expand Up @@ -73,6 +73,7 @@ Changes
* bump minimum numpy version to 1.13.3

Fixes
* fixed reading AMBER topologies with negative ATOMIC_NUMBERS (Issue #2306)
* fixed reading bz2 compressed psf files (Issue #2232)
* fixed mol2 comment header handling (Issue #2261)
* fixed reading PDB files with partial CRYST lines (Issue #2252)
Expand Down
33 changes: 31 additions & 2 deletions package/MDAnalysis/topology/TOPParser.py
Expand Up @@ -109,6 +109,11 @@
Impropers
)

import warnings
import logging

logger = logging.getLogger('MDAnalysis.topology.TOPParser')


class TypeIndices(AtomAttr):
"""Numerical type of each Atom"""
Expand Down Expand Up @@ -254,11 +259,27 @@ def next_getter():
attrs['dihedrals'], attrs['impropers'] = self.parse_dihedrals(
attrs.pop('diha'), attrs.pop('dihh'))

# Guess elements if not in topology
# Guess elements if not in topology or if any dummy atoms are found
if not 'elements' in attrs:
msg = ("ATOMIC_NUMBER record not found, guessing atom elements "
"based on their atom types")
logger.warning(msg)
warnings.warn(msg)
attrs['elements'] = Elements(
guessers.guess_types(attrs['types'].values),
guessed=True)
elif np.any(attrs['elements'].values == "DUMMY"):
# This approach assumes np.any is much faster than np.where
attrs['elements']._guessed = True
for dummy_idx in np.where(attrs['elements'].values == 'DUMMY')[0]:
atom_type = attrs['types'].values[dummy_idx]
atom_ele = guessers.guess_atom_element(atom_type)
msg = ("Unknown ATOMIC_NUMBER value found, guessing atom "
"element from type: {0} assigned to {1}".format(
atom_type, atom_ele))
logger.warning(msg)
warnings.warn(msg)
attrs['elements'].values[dummy_idx] = atom_ele

# atom ids are mandatory
attrs['atomids'] = Atomids(np.arange(n_atoms) + 1)
Expand Down Expand Up @@ -384,9 +405,17 @@ def parse_elements(self, num_per_record, numlines):
attr : :class:`Elements`
A :class:`Elements` instance containing the element of each atom
as defined in the parm7 file

Note
----
If the record contains atomic numbers <= 0, these will be treated as
dummy elements and an attempt will be made to guess the element based
on atom type. See issue #2306 for more details.
"""

vals = self.parsesection_mapper(
numlines, lambda x: NUMBER_TO_ELEMENT[int(x)])
numlines,
lambda x: NUMBER_TO_ELEMENT[int(x)] if int(x) > 0 else "DUMMY")
attr = Elements(np.array(vals, dtype=object))
return attr

Expand Down
141 changes: 141 additions & 0 deletions testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.negative.parm7
@@ -0,0 +1,141 @@
%VERSION VERSION_STAMP = V0001.000 DATE = 09/08/18 15:36:17
%FLAG TITLE
%FORMAT(20a4)
ACE
%FLAG POINTERS
%FORMAT(10I8)
6 4 3 2 6 1 9 0 0 0
16 1 2 1 0 3 3 3 4 0
0 0 0 0 0 0 0 0 6 0
0
%FLAG ATOM_NAME
%FORMAT(20a4)
HH31CH3 HH32HH33C O
%FLAG CHARGE
%FORMAT(5E16.8)
2.04636429E+00 -6.67300626E+00 2.04636429E+00 2.04636429E+00 1.08823576E+01
-1.03484442E+01
%FLAG ATOMIC_NUMBER
%FORMAT(10I8)
1 -1 1 1 6 -1
%FLAG MASS
%FORMAT(5E16.8)
1.00800000E+00 1.20100000E+01 1.00800000E+00 1.00800000E+00 1.20100000E+01
1.60000000E+01
%FLAG ATOM_TYPE_INDEX
%FORMAT(10I8)
1 2 1 1 3 4
%FLAG NUMBER_EXCLUDED_ATOMS
%FORMAT(10I8)
5 4 3 2 1 1
%FLAG NONBONDED_PARM_INDEX
%FORMAT(10I8)
1 2 4 7 2 3 5 8 4 5
6 9 7 8 9 10
%FLAG RESIDUE_LABEL
%FORMAT(20a4)
ACE
%FLAG RESIDUE_POINTER
%FORMAT(10I8)
1
%FLAG BOND_FORCE_CONSTANT
%FORMAT(5E16.8)
5.70000000E+02 3.40000000E+02 3.17000000E+02
%FLAG BOND_EQUIL_VALUE
%FORMAT(5E16.8)
1.22900000E+00 1.09000000E+00 1.52200000E+00
%FLAG ANGLE_FORCE_CONSTANT
%FORMAT(5E16.8)
5.00000000E+01 3.50000000E+01 8.00000000E+01
%FLAG ANGLE_EQUIL_VALUE
%FORMAT(5E16.8)
1.91113635E+00 1.91113635E+00 2.10137732E+00
%FLAG DIHEDRAL_FORCE_CONSTANT
%FORMAT(5E16.8)
8.00000000E-01 0.00000000E+00 8.00000000E-02
%FLAG DIHEDRAL_PERIODICITY
%FORMAT(5E16.8)
1.00000000E+00 2.00000000E+00 3.00000000E+00
%FLAG DIHEDRAL_PHASE
%FORMAT(5E16.8)
0.00000000E+00 0.00000000E+00 3.14159400E+00
%FLAG SCEE_SCALE_FACTOR
%FORMAT(5E16.8)
1.20000000E+00 1.20000000E+00 1.20000000E+00
%FLAG SCNB_SCALE_FACTOR
%FORMAT(5E16.8)
2.00000000E+00 2.00000000E+00 2.00000000E+00
%FLAG SOLTY
%FORMAT(5E16.8)
0.00000000E+00 0.00000000E+00 0.00000000E+00 0.00000000E+00
%FLAG LENNARD_JONES_ACOEF
%FORMAT(5E16.8)
7.51607703E+03 9.71708117E+04 1.04308023E+06 8.61541883E+04 9.24822270E+05
8.19971662E+05 5.44261042E+04 6.47841731E+05 5.74393458E+05 3.79876399E+05
%FLAG LENNARD_JONES_BCOEF
%FORMAT(5E16.8)
2.17257828E+01 1.26919150E+02 6.75612247E+02 1.12529845E+02 5.99015525E+02
5.31102864E+02 1.11805549E+02 6.26720080E+02 5.55666448E+02 5.64885984E+02
%FLAG BONDS_INC_HYDROGEN
%FORMAT(10I8)
3 6 2 3 9 2 0 3 2
%FLAG BONDS_WITHOUT_HYDROGEN
%FORMAT(10I8)
12 15 1 3 12 3
%FLAG ANGLES_INC_HYDROGEN
%FORMAT(10I8)
9 3 12 1 6 3 9 2 6 3
12 1 0 3 6 2 0 3 9 2
0 3 12 1
%FLAG ANGLES_WITHOUT_HYDROGEN
%FORMAT(10I8)
3 12 15 3
%FLAG DIHEDRALS_INC_HYDROGEN
%FORMAT(10I8)
9 3 12 15 1 9 3 -12 15 2
9 3 -12 15 3 6 3 12 15 1
6 3 -12 15 2 6 3 -12 15 3
0 3 12 15 1 0 3 -12 15 2
0 3 -12 15 3
%FLAG DIHEDRALS_WITHOUT_HYDROGEN
%FORMAT(10I8)

%FLAG EXCLUDED_ATOMS_LIST
%FORMAT(10I8)
2 3 4 5 6 3 4 5 6 4
5 6 5 6 6 0
%FLAG HBOND_ACOEF
%FORMAT(5E16.8)

%FLAG HBOND_BCOEF
%FORMAT(5E16.8)

%FLAG HBCUT
%FORMAT(5E16.8)

%FLAG AMBER_ATOM_TYPE
%FORMAT(20a4)
HC CT HC HC C O
%FLAG TREE_CHAIN_CLASSIFICATION
%FORMAT(20a4)
M M E E M E
%FLAG JOIN_ARRAY
%FORMAT(10I8)
0 0 0 0 0 0
%FLAG IROTAT
%FORMAT(10I8)
0 0 0 0 0 0
%FLAG RADIUS_SET
%FORMAT(1a80)
ArgH and AspGluO modified Bondi2 radii (mbondi3)
%FLAG RADII
%FORMAT(5E16.8)
1.20000000E+00 1.70000000E+00 1.20000000E+00 1.20000000E+00 1.70000000E+00
1.50000000E+00
%FLAG SCREEN
%FORMAT(5E16.8)
8.50000000E-01 7.20000000E-01 8.50000000E-01 8.50000000E-01 7.20000000E-01
8.50000000E-01
%FLAG IPOL
%FORMAT(1I8)
0
3 changes: 3 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Expand Up @@ -96,6 +96,7 @@
"PFncdf_Top", "PFncdf_Trj", # Amber ncdf with Positions and Forces
"PRMcs", # Amber (format, Issue 1331)
"PRMNCRST", # Amber ncrst with positions/forces/velocities
"PRMNEGATIVE", # Amber negative ATOMIC_NUMBER (Issue 2306)
"PRMErr1", "PRMErr2", "PRMErr3", # Amber TOP files to check raised errors
"PQR", # PQR v1
"PQR_icodes", # PQR v2 with icodes
Expand Down Expand Up @@ -345,6 +346,8 @@

PRMNCRST = resource_filename(__name__, 'data/Amber/ace_mbondi3.parm7')

PRMNEGATIVE = resource_filename(__name__, 'data/Amber/ace_mbondi3.negative.parm7')

PRMErr1 = resource_filename(__name__, 'data/Amber/ace_mbondi3.error1.parm7')
PRMErr2 = resource_filename(__name__, 'data/Amber/ace_mbondi3.error2.parm7')
PRMErr3 = resource_filename(__name__, 'data/Amber/ace_mbondi3.error3.parm7')
Expand Down
74 changes: 74 additions & 0 deletions testsuite/MDAnalysisTests/topology/test_top.py
Expand Up @@ -31,6 +31,7 @@
PRM7, # tz2.truncoct.parm7.bz2
PRMpbc,
PRMNCRST,
PRMNEGATIVE,
PRMErr1,
PRMErr2,
PRMErr3
Expand Down Expand Up @@ -171,6 +172,15 @@ class TestPRMParser(TOPBase):
atom_i_improper_values = ((74, 79, 77, 78), (77, 80, 79, 83),
(79, 81, 80, 82), (79, 84, 83, 85))

def test_warning(self, filename):
with pytest.warns(UserWarning) as record:
u = mda.Universe(filename)

assert len(record) == 1
wmsg = ("ATOMIC_NUMBER record not found, guessing atom elements "
"based on their atom types")
assert str(record[0].message.args[0]) == wmsg


class TestPRM12Parser(TOPBase):
ref_filename = PRM12
Expand Down Expand Up @@ -279,6 +289,15 @@ class TestParm7Parser(TOPBase):
atom_zero_improper_values = ()
atom_i_improper_values = ((131, 135, 133, 134), (135, 157, 155, 156))

def test_warning(self, filename):
with pytest.warns(UserWarning) as record:
u = mda.Universe(filename)

assert len(record) == 1
wmsg = ("ATOMIC_NUMBER record not found, guessing atom elements "
"based on their atom types")
assert str(record[0].message.args[0]) == wmsg


class TestPRM2(TOPBase):
ref_filename = PRMpbc
Expand Down Expand Up @@ -318,6 +337,15 @@ class TestPRM2(TOPBase):
atom_zero_improper_values = ()
atom_i_improper_values = ((8, 16, 14, 15), (14, 18, 16, 17))

def test_warning(self, filename):
with pytest.warns(UserWarning) as record:
u = mda.Universe(filename)

assert len(record) == 1
wmsg = ("ATOMIC_NUMBER record not found, guessing atom elements "
"based on their atom types")
assert str(record[0].message.args[0]) == wmsg


class TestPRMNCRST(TOPBase):
# Test case of PARM7 with no non-hydrogen including dihedrals
Expand Down Expand Up @@ -348,6 +376,52 @@ class TestPRMNCRST(TOPBase):
atom_i_improper_values = ()


class TestPRMNCRST_negative(TOPBase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should be able to write some sort of test that uses assert_warns (or is it with pytest.warns:?)

# Same as above but with negative ATOMIC_NUMBER values (Issue 2306)
ref_filename = PRMNEGATIVE
guessed_attrs = ['elements']
expected_n_atoms = 6
expected_n_residues = 1
ref_proteinatoms = 6
expected_n_bonds = 5
expected_n_angles = 7
expected_n_dihedrals = 3
expected_n_impropers = 0
atom_i = 4
expected_n_zero_bonds = 1
expected_n_i_bonds = 2
expected_n_zero_angles = 3
expected_n_i_angles = 4
expected_n_zero_dihedrals = 1
expected_n_i_dihedrals = 3
expected_n_zero_impropers = 0
expected_n_i_impropers = 0
atom_zero_bond_values = ((0, 1),)
atom_i_bond_values = ((1, 4), (4, 5))
atom_zero_angle_values = ((0, 1, 2), (0, 1, 3), (0, 1, 4))
atom_i_angle_values = ((0, 1, 4), (1, 4, 5), (2, 1, 4), (3, 1, 4))
atom_zero_dihedral_values = ((0, 1, 4, 5),)
atom_i_dihedral_values = ((0, 1, 4, 5), (2, 1, 4, 5), (3, 1, 4, 5))
atom_zero_improper_values = ()
atom_i_improper_values = ()

def test_elements(self, top):
assert(top.elements.values[1] == 'C')
assert(top.elements.values[5] == 'O')

def test_warning(self, filename):
with pytest.warns(UserWarning) as record:
u = mda.Universe(filename)

assert len(record) == 2
wmsg1 = ("Unknown ATOMIC_NUMBER value found, guessing atom element "
"from type: CT assigned to C")
wmsg2 = ("Unknown ATOMIC_NUMBER value found, guessing atom element "
"from type: O assigned to O")
assert str(record[0].message.args[0]) == wmsg1
assert str(record[1].message.args[0]) == wmsg2


class TestErrors(object):
# Check Errors being raised
def test_versionline(self):
Expand Down