From 69bc40dc5b329bd454e988cb216d8a8be27a92f4 Mon Sep 17 00:00:00 2001 From: Manuel Nuno Melo Date: Mon, 12 Oct 2020 19:54:55 +0100 Subject: [PATCH] Implemented compact wrapping and nojump; updated transforms. Optimized AtomGroup.wrap() for compounds. Added transformations.nojump; transformations.wrap and transformations.unwrap now transparently expose AtomGroup.wrap() and AtomGroup.unwrap(). Added function apply_compact_PBC and added 'center' keyword to apply_PBC. Some PBC-related doc cleaning and clarification. --- package/MDAnalysis/core/groups.py | 57 +++- package/MDAnalysis/lib/distances.py | 99 ++++++- package/MDAnalysis/lib/mdamath.py | 2 +- .../MDAnalysis/transformations/__init__.py | 2 +- package/MDAnalysis/transformations/wrap.py | 259 ++++++++++++++---- 5 files changed, 356 insertions(+), 63 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 0264617c469..365682cf4dc 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -1345,7 +1345,7 @@ def pack_into_box(self, box=None, inplace=True): """ return self.wrap(box=box, inplace=inplace) - def wrap(self, compound="atoms", center="com", box=None, inplace=True): + def wrap(self, compound="atoms", center="com", compact=False, box=None, inplace=True): r"""Shift the contents of this group back into the primary unit cell according to periodic boundary conditions. @@ -1354,6 +1354,28 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): ``'atoms'``, each compound as a whole will be shifted so that its `center` lies within the primary unit cell. + Wrapping with `compound`='atoms': + +-----------+ +-----------+ + | | | | + | | 3 6 | 3 6 | + | | ! ! | ! ! | + | 1-|-2-5-8 -> |-2-5-8 1-| + | | ! ! | ! ! | + | | 4 7 | 4 7 | + | | | | + +-----------+ +-----------+ + + Wrapping with `compound`='fragments': + +-----------+ +-----------+ + | | | | + | | 3 6 | 3 6 | + | | ! ! | ! ! | + | 1-|-2-5-8 -> 1-|-2-5-8 | + | | ! ! | ! ! | + | | 4 7 | 4 7 | + | | | | + +-----------+ +-----------+ + Parameters ---------- compound : {'atoms', 'group', 'segments', 'residues', 'molecules', \ @@ -1364,6 +1386,10 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): center : {'com', 'cog'} How to define the center of a given group of atoms. If `compound` is ``'atoms'``, this parameter is meaningless and therefore ignored. + compact : bool, optional + If ``True``, wrapping will be done to the space-filling volume + closest to the primary unit cell's center (only useful for + non-orthogonal periodic boxes). box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by @@ -1407,6 +1433,12 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): x_i' = x_i - \left\lfloor\frac{x_i}{L_i}\right\rfloor\,. + `compact` changes the shifting behavior: centers will be shifted by + the integer combination of box vectors that minimizes + :math:`\left\|\mathbf{r_{compact}}-\mathbf{r_{box\_center}}\right\|`, + the distance of a center's final position, :math:`\mathbf{r_{compact}}`, + to the primary unit cell's center. + When specifying a `compound`, the translation is calculated based on each compound. The same translation is applied to all atoms within this compound, meaning it will not be broken by the shift. @@ -1420,6 +1452,7 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): This can be either ``'com'`` for center of mass, or ``'cog'`` for center of geometry. + `box` allows a unit cell to be given for the transformation. If not specified, the :attr:`~MDAnalysis.coordinates.base.Timestep.dimensions` information from the current @@ -1451,6 +1484,7 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): raise ValueError("Invalid box: Box has invalid shape or not all " "box dimensions are positive. You can specify a " "valid box using the 'box' argument.") + packer = distances.apply_compact_PBC if compact else distances.apply_PBC # no matter what kind of group we have, we need to work on its (unique) # atoms: @@ -1472,7 +1506,7 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): return np.zeros((0, 3), dtype=np.float32) if comp == "atoms" or len(atoms) == 1: - positions = distances.apply_PBC(atoms.positions, box) + positions = packer(atoms.positions, box) else: ctr = center.lower() if ctr == 'com': @@ -1496,7 +1530,7 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): else: # ctr == 'cog' ctrpos = atoms.center_of_geometry(pbc=False, compound=comp) ctrpos = ctrpos.astype(np.float32, copy=False) - target = distances.apply_PBC(ctrpos, box) + target = packer(ctrpos, box) positions += target - ctrpos else: if comp == 'segments': @@ -1510,6 +1544,7 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): errmsg = ("Cannot use compound='molecules', this " "requires molnums.") raise NoDataError(errmsg) from None + else: # comp == 'fragments' try: compound_indices = atoms.fragindices @@ -1529,16 +1564,16 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): else: # ctr == 'cog' ctrpos = atoms.center_of_geometry(pbc=False, compound=comp) ctrpos = ctrpos.astype(np.float32, copy=False) - target = distances.apply_PBC(ctrpos, box) + target = packer(ctrpos, box) shifts = target - ctrpos + # There may be gaps in the used indices. We must translate + # them to a (0, 1, 2, ..., n_indices-1) sequence + unique_indices = unique_int_1d(compound_indices) + transl_indices = np.arange(unique_indices.max()+1) + transl_indices[unique_indices] = np.arange(len(unique_indices)) # apply the shifts: - unique_compound_indices = unique_int_1d(compound_indices) - shift_idx = 0 - for i in unique_compound_indices: - mask = np.where(compound_indices == i) - positions[mask] += shifts[shift_idx] - shift_idx += 1 + positions += shifts[transl_indices[compound_indices]] if inplace: atoms.positions = positions @@ -1559,7 +1594,7 @@ def unwrap(self, compound='fragments', reference='com', inplace=True): | | | | | 6 3 | | 3 | 6 | ! ! | | ! | ! - |-5-8 1-2-| ==> | 1-2-|-5-8 + |-5-8 1-2-| -> | 1-2-|-5-8 | ! ! | | ! | ! | 7 4 | | 4 | 7 | | | | diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index 74782a67797..ffbe86e3e87 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -61,6 +61,7 @@ .. autofunction:: calc_angles .. autofunction:: calc_dihedrals .. autofunction:: apply_PBC +.. autofunction:: apply_compact_PBC .. autofunction:: transform_RtoS .. autofunction:: transform_StoR .. autofunction:: augment_coordinates(coordinates, box, r) @@ -68,6 +69,7 @@ """ import numpy as np from numpy.lib.utils import deprecate +import itertools from .util import check_coords, check_box from .mdamath import triclinic_vectors @@ -1133,7 +1135,7 @@ def transform_RtoS(coords, box, backend="serial"): Returns ------- newcoords : numpy.ndarray (``dtype=numpy.float32``, ``shape=coords.shape``) - An array containing fractional coordiantes. + An array containing fractional coordinates. .. versionchanged:: 0.13.0 @@ -1482,8 +1484,8 @@ def calc_dihedrals(coords1, coords2, coords3, coords4, box=None, result=None, @check_coords('coords') -def apply_PBC(coords, box, backend="serial"): - """Moves coordinates into the primary unit cell. +def apply_PBC(coords, box, center=None, backend="serial"): + """Moves coordinates into an arbitrarily centered unit cell. Parameters ---------- @@ -1495,6 +1497,9 @@ def apply_PBC(coords, box, backend="serial"): triclinic and must be provided in the same format as returned by :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. + center : numpy.ndarray + Coordinate array of shape ``(3,)`` of the center of the output unit + cell representation. Defaults to the primary unit cell's center. backend : {'serial', 'OpenMP'}, optional Keyword selecting the type of acceleration. @@ -1511,13 +1516,101 @@ def apply_PBC(coords, box, backend="serial"): .. versionchanged:: 0.19.0 Internal dtype conversion of input coordinates to ``numpy.float32``. Now also accepts (and, likewise, returns) single coordinates. + .. versionchanged:: 2.0.0 + Added *center* keyword. """ if len(coords) == 0: return coords boxtype, box = check_box(box) + + if center is not None: + box_center = triclinic_vectors(box).sum(axis=0) * 0.5 + center_displacement = box_center - center + coords += center_displacement + if boxtype == 'ortho': _run("ortho_pbc", args=(coords, box), backend=backend) else: _run("triclinic_pbc", args=(coords, box), backend=backend) + if center is not None: + coords -= center_displacement + + return coords + +@check_coords('coords') +def apply_compact_PBC(coords, box, center=None, backend="serial"): + """Moves coordinates into an arbitrarily centered, space-filling compact + volume. + + Parameters + ---------- + coords : numpy.ndarray + Coordinate array of shape ``(3,)`` or ``(n, 3)`` (dtype is arbitrary, + will be converted to ``numpy.float32`` internally). + box : numpy.ndarray + The unitcell dimensions of the system, which can be orthogonal or + triclinic and must be provided in the same format as returned by + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx, ly, lz, alpha, beta, gamma]``. + center : numpy.ndarray + Coordinate array of shape ``(3,)`` of the center of the output compact + representation. Defaults to the primary unit cell's center. + backend : {'serial', 'OpenMP'}, optional + Keyword selecting the type of acceleration. + + Returns + ------- + newcoords : numpy.ndarray (``dtype=numpy.float32``, ``shape=coords.shape``) + Array containing coordinates that all lie within a compact space-filling + volume centered on the point given with `center` (compact in that all + that volume's points are closest to the center than to any of the + center's periodic images). + + + .. versionadded:: 2.0.0 + """ + boxtype, box = check_box(box) + # shortcut for orthogonal boxes + if boxtype == 'ortho': + return apply_PBC(coords, box, center=center, backend=backend) + + tric_vecs = triclinic_vectors(box) + pbc_img_coeffs = np.array(list(itertools.product([0,-1,1], repeat=3))) + pbc_img_vecs = np.matmul(pbc_img_coeffs, tric_vecs) + + # Stuff could get sped up because this can work with squared distance; + # no accelerated function exists for it yet, though. + min_compact_norm = np.linalg.norm(pbc_img_vecs[1:], axis=1).min() * 0.5 + + if center is None: + center = tric_vecs.sum(axis=0) * 0.5 + else: + center = np.asarray(center, dtype=np.float32) + pbc_imgs = pbc_img_vecs + center + + # Also improvable as distance^2 + center_dists = distance_array(center, coords)[0] + outside_ats = np.where(center_dists > min_compact_norm)[0] + # Let's make sure we're dealing with everyone in the primary cell + outside_pos = coords[outside_ats] = apply_PBC(coords[outside_ats], + box, backend) + # Potentially save cycles by re-filtering after PBC. It makes sense for GROMACS + # trajectories, that are saved as the rectangular cell. + center_dists = distance_array(center, outside_pos)[0] + outside_ats = outside_ats[center_dists > min_compact_norm] + outside_pos = coords[outside_ats] + + # This works for the general case by checking the distance to all 27 + # unit cell centers (primary + 1st neighbors). Will break for extremely + # skewed cases where the closest center is in a 2nd neighbor unit cell. + # The distance check to all 26 1st neighbors can also probably be optimized, + # both in general and also with boxtype-specific shortcuts. + # Also improvable as distance^2 + closest_img_ndx = distance_array(outside_pos, pbc_imgs).argmin(axis=1) + to_move = closest_img_ndx.nonzero()[0] + closest_img_ndx = closest_img_ndx[to_move] + outside_ats = outside_ats[to_move] + outside_pos = coords[outside_ats] - pbc_img_vecs[closest_img_ndx] + coords[outside_ats] = outside_pos return coords diff --git a/package/MDAnalysis/lib/mdamath.py b/package/MDAnalysis/lib/mdamath.py index f65365cf244..4ea7b286e18 100644 --- a/package/MDAnalysis/lib/mdamath.py +++ b/package/MDAnalysis/lib/mdamath.py @@ -58,7 +58,7 @@ def norm(v): .. math:: v = \sqrt{\mathbf{v}\cdot\mathbf{v}} - This version is faster then numpy.linalg.norm because it only works for a + This version is faster than numpy.linalg.norm because it only works for a single vector and therefore can skip a lot of the additional fuss linalg.norm does. diff --git a/package/MDAnalysis/transformations/__init__.py b/package/MDAnalysis/transformations/__init__.py index 91083fff18b..94e93afdf7b 100644 --- a/package/MDAnalysis/transformations/__init__.py +++ b/package/MDAnalysis/transformations/__init__.py @@ -129,4 +129,4 @@ def wrapped(ts): from .rotate import rotateby from .positionaveraging import PositionAverager from .fit import fit_translation, fit_rot_trans -from .wrap import wrap, unwrap +from .wrap import wrap, unwrap, nojump diff --git a/package/MDAnalysis/transformations/wrap.py b/package/MDAnalysis/transformations/wrap.py index 10e565fc072..d1bc8297490 100644 --- a/package/MDAnalysis/transformations/wrap.py +++ b/package/MDAnalysis/transformations/wrap.py @@ -24,73 +24,100 @@ Wrap/unwrap transformations --- :mod:`MDAnalysis.transformations.wrap` ====================================================================== -Wrap/unwrap the atoms of a given AtomGroup in the unit cell. :func:`wrap` -translates the atoms back in the unit cell. :func:`unwrap` translates the -atoms of each molecule so that bons don't split over images. +Wrap/unwrap the atoms of a given AtomGroup in the unit cell. :class:`wrap` +translates the atoms back in the unit cell. :class:`unwrap` translates the +atoms of each molecule so that bons don't split over images. :class:`nojump` +removes jumps across the periodic boundaries when iterating along a trajectory. .. autoclass:: wrap .. autoclass:: unwrap +.. autoclass:: nojump """ -from ..lib._cutil import make_whole +from ..lib.distances import apply_PBC +from ..lib.mdamath import triclinic_vectors +from ..exceptions import NoDataError +import numpy as np class wrap(object): """ Shift the contents of a given AtomGroup back into the unit cell. :: - - +-----------+ +-----------+ - | | | | - | 3 | 6 | 6 3 | - | ! | ! | ! ! | - | 1-2-|-5-8 -> |-5-8 1-2-| - | ! | ! | ! ! | - | 4 | 7 | 7 4 | - | | | | - +-----------+ +-----------+ - + + Wrapping with `compound`='atoms': + +-----------+ +-----------+ + | | | | + | | 3 6 | 3 6 | + | | ! ! | ! ! | + | 1-|-2-5-8 -> |-2-5-8 1-| + | | ! ! | ! ! | + | | 4 7 | 4 7 | + | | | | + +-----------+ +-----------+ + + Wrapping with `compound`='fragments': + +-----------+ +-----------+ + | | | | + | | 3 6 | 3 6 | + | | ! ! | ! ! | + | 1-|-2-5-8 -> 1-|-2-5-8 | + | | ! ! | ! ! | + | | 4 7 | 4 7 | + | | | | + +-----------+ +-----------+ + Example ------- - + .. code-block:: python - - ag = u.atoms + + ag = u.atoms transform = mda.transformations.wrap(ag) u.trajectory.add_transformations(transform) - + Parameters ---------- - + ag: Atomgroup Atomgroup to be wrapped in the unit cell - compound : {'atoms', 'group', 'residues', 'segments', 'fragments'}, optional - The group which will be kept together through the shifting process. - + kwargs : optional + Arguments to be passed to + :meth:`MDAnalysis.core.groups.AtomGroup.wrap`. + Notes ----- - When specifying a `compound`, the translation is calculated based on - each compound. The same translation is applied to all atoms - within this compound, meaning it will not be broken by the shift. + This is a wrapper around :meth:`MDAnalysis.core.groups.AtomGroup.wrap`. + Refer to its documentation on additional `kwargs` parameters. Namely, + `compound` allows the selection of how different sub-groups are wrapped + (per atom, residue, fragment, molecule, etc.); `center` allows the + choice of the center-of-mass vs the center-of-geometry as the reference + center for each compound. The same translation is applied to all atoms + within a compound, meaning it will not be broken by the shift. This might however mean that not all atoms from the compound are inside the unit cell, but rather the center of the compound is. - + Returns ------- MDAnalysis.coordinates.base.Timestep + See Also + -------- + :meth:`MDAnalysis.core.groups.AtomGroup.wrap` + .. versionchanged:: 2.0.0 The transformation was changed from a function/closure to a class - with ``__call__``. + with ``__call__``. It now also fully exposes the interface of + :meth:`MDAnalysis.core.groups.AtomGroup.wrap` """ - def __init__(self, ag, compound='atoms'): + def __init__(self, ag, **kwargs): self.ag = ag - self.compound = compound + self.kwargs = kwargs def __call__(self, ts): - self.ag.wrap(compound=self.compound) + self.ag.wrap(**self.kwargs) return ts @@ -104,7 +131,7 @@ class unwrap(object): unit cell, causing breaks mid molecule, with the molecule then appearing on either side of the unit cell. This is problematic for operations such as calculating the center of mass of the molecule. :: - + +-----------+ +-----------+ | | | | | 6 3 | | 3 | 6 @@ -114,40 +141,178 @@ class unwrap(object): | 7 4 | | 4 | 7 | | | | +-----------+ +-----------+ - + Example ------- - + .. code-block:: python - - ag = u.atoms + + ag = u.atoms transform = mda.transformations.unwrap(ag) u.trajectory.add_transformations(transform) - + Parameters ---------- atomgroup : AtomGroup The :class:`MDAnalysis.core.groups.AtomGroup` to work with. The positions of this are modified in place. - + kwargs : optional + Arguments to be passed to + :meth:`MDAnalysis.core.groups.AtomGroup.unwrap`. + + Notes + ----- + This is a wrapper around :meth:`MDAnalysis.core.groups.AtomGroup.unwrap`. + Refer to its documentation on additional `kwargs` parameters. Namely, + `compound` allows the selection of how different sub-groups are wrapped + (as a whole, per residue, fragment, molecule, etc.); `reference` allows + the choice of the center-of-mass vs the center-of-geometry as the + reference center for each compound. + Returns ------- - MDAnalysis.coordinates.base.Timestep + :class:`MDAnalysis.coordinates.base.Timestep` + + See Also + -------- + :func:`MDAnalysis.core.groups.AtomGroup.unwrap` .. versionchanged:: 2.0.0 The transformation was changed from a function/closure to a class - with ``__call__``. + with ``__call__``. It now also fully exposes the interface of + :meth:`MDAnalysis.core.groups.AtomGroup.unwrap` """ - def __init__(self, ag): + def __init__(self, ag, **kwargs): self.ag = ag + self.kwargs = kwargs + + def __call__(self, ts): + self.ag.unwrap(**self.kwargs) + return ts + + +class nojump(object): + """ + Remove jumps across the periodic boundaries in consecutive frames. + + Jumps are identified from translations greater than half the length of each + box vector. Selective jump removal for x, y, and z is possible. + + This function is based on consecutive frame displacement vectors. It will + keep molecules whole provided they were whole in the first iteration frame. + By default, it will automatically call + :meth:`MDAnalysis.core.groups.AtomGroup.unwrap` if an iteration reset is + detected. - try: - self.ag.fragments - except AttributeError: - raise AttributeError("{} has no fragments".format(self.ag)) + Example + ------- + + .. code-block:: python + + ag = u.atoms + # starts from a configuration with whole fragments. + transform = mda.transformations.nojump(ag, initial_workflow=unwrap(ats)) + u.trajectory.add_transformations(transform) + + Parameters + ---------- + atomgroup : AtomGroup + The :class:`MDAnalysis.core.groups.AtomGroup` to work with. + The positions of this are modified in place. + x, y, z : bool, optional + Over which dimension(s) to remove jumps. + initial_workflow : callable or list, optional + Transform(s) to apply when starting or restarting iteration. + refocus : bool or int + Whether to correct each frame for position drift due to successive + vector addition. If set to a nonzero int, defines every how many frames + to correct, representing a tradeoff between performance and accuracy. + backend : {'serial', 'OpenMP'}, optional + Keyword selecting the type of acceleration in subcalls to + :func:`MDAnalysis.lib.distances.apply_PBC`. + + Notes + ----- + Because a no-jump treatment preserves atom neighborhoods, an unwrap + transformation at the first frame (specified with `initial_workflow`) + ensures that fragments are kept whole throughout the transformed trajectory, + and avoids the need of additional unwrapping transformations. However, if + :class:`nojump` only operates over some of the dimensions, fragments can + become broken over the untreated ones. + Because it needs frame-sequential position information, :class:`nojump` will + reset the transformation if it detects a backward jump in frames. + Multi-frame forward jumps do not trigger such a transformation reset, but at + very large frame skips displacements between treated frames may become + larger than half the box vectors, yielding wrong results. + + Returns + ------- + :class:`MDAnalysis.coordinates.base.Timestep` + + See Also + -------- + :func:`MDAnalysis.core.groups.AtomGroup.unwrap` + + + .. versionadded:: 2.0.0 + """ + def __init__(self, ag, x=True, y=True, z=True, + initial_workflow=None, refocus=True, backend='serial'): + self.ag = ag + self.prev_frame = None + self.prev_cdx = None + self.transformed_cdx = None + self.dims = [i for i, d in enumerate((x,y,z)) if d] + self.initial_workflow = [] + if initial_workflow: + if callable(initial_workflow): + self.initial_workflow = [initial_workflow] + else: + self.initial_workflow = initial_workflow + self.refocus = int(refocus) + self.backend = backend def __call__(self, ts): - for frag in self.ag.fragments: - make_whole(frag) + # If all dims are False, this is a null transform + if not self.dims: + return ts + + # Shortcut when the same frame is loaded repeatedly + if ts.frame == self.prev_frame: + self.ag.positions = self.transformed_cdx + return ts + + # The case of frame 0 or an iteration reset + if self.prev_cdx is None or ts.frame - self.prev_frame < 0: + for transform in self.initial_workflow: + ts = transform(ts) + self.prev_cdx = self.ag.positions + self.transformed_cdx = self.ag.positions + self.prev_frame = ts.frame + self.accumulated_frames = 0 + return ts + + self.accumulated_frames += 1 + vecs = self.ag.positions - self.prev_cdx + self.prev_cdx = self.ag.positions + + vecs_fixed = apply_PBC(vecs, box=ts.dimensions, center=[0.,0.,0.], + backend=self.backend) + + vecs[:, self.dims] = vecs_fixed[:, self.dims] + self.transformed_cdx += vecs + if self.refocus and self.accumulated_frames >= self.refocus: + # refocuses self.transformed_cdx to actual shifts of primary + # cell coordinates. + self._refocus() + self.ag.positions = self.transformed_cdx + self.prev_frame = ts.frame return ts + + def _refocus(self): + corrective_shift = apply_PBC(self.ag.positions - self.transformed_cdx, + box=self.ag.dimensions, center=[0.,0.,0.,], + backend=self.backend) + self.transformed_cdx += corrective_shift + self.accumulated_frames = 0