Skip to content

Commit

Permalink
TopologyAttr registration; prototype Topology json serialization
Browse files Browse the repository at this point in the history
We now register TopologyAttrs using the same metaclassing mechanism as
used for Parsers, Readers, and Writers. This is necessary in order to
deserialize Topology objects that have been serialized to JSON.

An existing Topology can now be serialized to JSON with:

```python

top.to_json('topology.json')

```

and a Topology can be generated from its JSON form with:

```python

top = Topology.from_json('topology.json')

```

Individual TopologyAttrs must define how to serialize themselves and
to deserialize their data to make an instance of themselves for this to
work.

Addresses #643.
  • Loading branch information
dotsdl committed Apr 21, 2016
1 parent ec3ac90 commit 574cd1f
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 3 deletions.
2 changes: 2 additions & 0 deletions package/MDAnalysis/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@

__all__ = ['AtomGroup', 'Selection', 'Timeseries']

# Registry of TopologyAttrs
_TOPOLOGYATTRS = {}

# set up flags for core routines (more convoluted than strictly necessary but should
# be clean to add more flags if needed)
Expand Down
95 changes: 95 additions & 0 deletions package/MDAnalysis/core/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import itertools

from ..lib import mdamath
from ..lib import util
from . import selection
from . import flags
from . import levels
Expand Down Expand Up @@ -1050,6 +1051,100 @@ def improper(self):
"improper only makes sense for a group with exactly 4 atoms")
return topologyobjects.ImproperDihedral(self._ix, self.universe)

# TODO: possibly refactor; copied directly from old AG
@property
def ts(self):
"""Returns a Timestep that contains only the group's coordinates.
Returns
-------
A :class:`~MDAnalysis.coordinates.base.Timestep` instance,
which can be passed to a trajectory writer.
If the returned timestep is modified the modifications
will not be reflected in the base timestep. Likewise,
when the underlying timestep changes (either by loading a
new frame or by setting new positions by hand) the returned
timestep will not reflect those changes.
"""
return self.universe.trajectory.ts.copy_slice(self.indices)

# TODO: refactor; copied directly from old AG
def write(self, filename=None, format="PDB",
filenamefmt="%(trjname)s_%(frame)d", **kwargs):
"""Write AtomGroup to a file.
AtomGroup.write(filename[,format])
:Keywords:
*filename*
``None``: create TRJNAME_FRAME.FORMAT from filenamefmt [``None``]
*format*
PDB, CRD, GRO, VMD (tcl), PyMol (pml), Gromacs (ndx) CHARMM (str)
Jmol (spt); case-insensitive and can also be supplied as the
filename extension [PDB]
*filenamefmt*
format string for default filename; use substitution tokens
'trjname' and 'frame' ["%(trjname)s_%(frame)d"]
*bonds*
how to handle bond information, especially relevant for PDBs;
default is ``"conect"``.
* ``"conect"``: write only the CONECT records defined in the original
file
* ``"all"``: write out all bonds, both the original defined and those
guessed by MDAnalysis
* ``None``: do not write out bonds
.. versionchanged:: 0.9.0
Merged with write_selection. This method can now write both
selections out.
"""
import MDAnalysis.coordinates
import MDAnalysis.selections

# check that AtomGroup actually has any atoms (Issue #434)
if len(self.atoms) == 0:
raise IndexError("Cannot write an AtomGroup with 0 atoms")

trj = self.universe.trajectory # unified trajectory API
frame = trj.ts.frame

if trj.n_frames == 1: kwargs.setdefault("multiframe", False)

if filename is None:
trjname, ext = os.path.splitext(os.path.basename(trj.filename))
filename = filenamefmt % vars()
filename = util.filename(filename, ext=format.lower(), keep=True)

# From the following blocks, one must pass.
# Both can't pass as the extensions don't overlap.
try:
writer = MDAnalysis.coordinates.writer(filename, **kwargs)
except TypeError:
# might be selections format
coords = False
else:
coords = True

try:
SelectionWriter = MDAnalysis.selections.get_writer(filename, format)
except (TypeError, NotImplementedError):
selection = False
else:
writer = SelectionWriter(filename, **kwargs)
selection = True

if not (coords or selection):
raise ValueError("No writer found for format: {0}".format(filename))
else:
writer.write(self.atoms)
if coords: # only these writers have a close method
writer.close()


class ResidueGroup(object):
level = 'residue'
Expand Down
63 changes: 63 additions & 0 deletions package/MDAnalysis/core/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
===================================================================
"""
import json

import numpy as np

from ..lib.mdamath import one_to_many_pointers
Expand Down Expand Up @@ -348,3 +350,64 @@ def add_TopologyAttr(self, topologyattr):
self.attrs.append(topologyattr)
topologyattr.top = self
self.__setattr__(topologyattr.attrname, topologyattr)

def to_json(self, filename):
"""Write out Topology to JSON form.
Parameters
----------
filename : str
File to write.
"""
jstruct = {'n_atoms': self.n_atoms,
'n_residues': self.n_residues,
'n_segments': self.n_segments,
'atoms->residues': self.tt.AR.tolist(),
'residues->segments': self.tt.RS.tolist(),
'topattrs': dict()}

# TODO: add warning that could not serialize some topology attributes
for topattr in self.attrs:

# these don't hold any data of their own
if isinstance(topattr, (Atomindices, Resindices, Segindices)):
continue

try:
jstruct['topattrs'][topattr.attrname] = topattr._to_json()
except AttributeError:
pass

# write out
with open(filename, 'w') as f:
json.dump(jstruct, f)

@classmethod
def from_json(cls, filename):
"""Generate Topology from JSON form.
"""
from . import _TOPOLOGYATTRS

with open(filename, 'r') as f:
jstruct = json.load(f)

attrs = []
for attrname, values in jstruct['topattrs'].items():
# TODO: add warning that could not deserialize a topology attribute
try:
topattr = _TOPOLOGYATTRS[attrname]
except KeyError:
pass

attrs.append(topattr._from_json(values))

top = cls(n_atoms=jstruct['n_atoms'],
n_res=jstruct['n_residues'],
n_seg=jstruct['n_segments'],
attrs=attrs,
atom_resindex=np.array(jstruct['atoms->residues']),
residue_segindex=np.array(jstruct['residues->segments']))

return top
45 changes: 44 additions & 1 deletion package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Common TopologyAttrs used by most topology parsers.
"""
import six
from six.moves import zip
from collections import defaultdict
import itertools
Expand All @@ -32,8 +33,22 @@
from . import selection
from . import flags

from . import _TOPOLOGYATTRS

class TopologyAttr(object):

class _TopologyAttrmeta(type):
# Auto register upon class creation
def __init__(cls, name, bases, classdict):
type.__init__(type, name, bases, classdict)
try:
attrname = classdict['attrname']
except KeyError:
pass
else:
_TOPOLOGYATTRS[attrname] = cls


class TopologyAttr(six.with_metaclass(_TopologyAttrmeta)):
"""Base class for Topology attributes.
.. note:: This class is intended to be subclassed, and mostly amounts to a
Expand Down Expand Up @@ -234,6 +249,13 @@ class AtomAttr(TopologyAttr):
singular = 'atomattr'
target_levels = ['atom']

def _to_json(self):
return self.values.tolist()

@classmethod
def _from_json(cls, values):
return cls(np.array(values))

def get_atoms(self, ag):
return self.values[ag._ix]

Expand Down Expand Up @@ -864,6 +886,13 @@ class ResidueAttr(TopologyAttr):
target_levels = ['residue']
per_object = 'residue'

def _to_json(self):
return self.values.tolist()

@classmethod
def _from_json(cls, values):
return cls(np.array(values))

def get_atoms(self, ag):
rix = self.top.tt.atoms2residues(ag._ix)
return self.values[rix]
Expand Down Expand Up @@ -959,6 +988,13 @@ class SegmentAttr(TopologyAttr):
target_levels = ['segment']
per_object = 'segment'

def _to_json(self):
return self.values.tolist()

@classmethod
def _from_json(cls, values):
return cls(np.array(values))

def get_atoms(self, ag):
six = self.top.tt.atoms2segments(ag._ix)
return self.values[six]
Expand Down Expand Up @@ -1045,6 +1081,13 @@ def __init__(self, values, types=None):
def __len__(self):
return len(self._bondDict)

def _to_json(self):
return {'values': self.values, 'types': self.types}

@classmethod
def _from_json(cls, values):
return cls(values=values['values'], types=values['types'])

@property
@cached('bd')
def _bondDict(self):
Expand Down
7 changes: 5 additions & 2 deletions package/MDAnalysis/core/universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def __init__(self, *args, **kwargs):
if isinstance(args[0], Topology):
self._topology = args[0]
self.filename = None

if len(coordinatefile) == 0:
coordinatefile = None
else:
self.filename = args[0]
topology_format = kwargs.pop('topology_format', None)
Expand All @@ -154,8 +157,8 @@ def __init__(self, *args, **kwargs):
# or if file is known as a topology & coordinate file, use that
if fmt is None:
fmt = util.guess_format(self.filename)
if (fmt in MDAnalysis.coordinates._trajectory_readers
and fmt in MDAnalysis.topology._topology_parsers):
if (fmt in MDAnalysis.coordinates._READERS
and fmt in MDAnalysis.topology._PARSERS):
coordinatefile = self.filename
if len(coordinatefile) == 0:
coordinatefile = None
Expand Down

0 comments on commit 574cd1f

Please sign in to comment.