DMSReader for topology and coordinates

- only read support, via sqlite3, - reads coordinates, velocities and bonds only, - added sanity tests (number of atoms and bonds), - added atomselection tests (name, resname, segid).
MDAnalysis · Jan 21, 2013 · bebea4a · bebea4a
1 parent a88b91f
commit bebea4a
Show file tree

Hide file tree

Showing 10 changed files with 310 additions and 6 deletions.
diff --git a/package/MDAnalysis/coordinates/DMS.py b/package/MDAnalysis/coordinates/DMS.py
@@ -0,0 +1,149 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- http://mdanalysis.googlecode.com
+# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
+#               Elizabeth J. Denning, Oliver Beckstein,
+#               and contributors (see website for details)
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+#     N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
+#     O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
+#     Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
+#     doi:10.1002/jcc.21787
+#
+
+"""
+
+"""
+from __future__ import with_statement
+
+import os, errno
+import warnings
+
+import numpy, sqlite3
+
+import MDAnalysis
+import base
+import MDAnalysis.core.util as util
+from MDAnalysis.coordinates.core import triclinic_box, triclinic_vectors
+
+from copy import deepcopy
+
+class Timestep(base.Timestep):
+        @property
+        def dimensions(self):
+                """unitcell dimensions (A, B, C, alpha, beta, gamma)
+
+                GRO::
+
+                  8.00170   8.00170   5.65806   0.00000   0.00000   0.00000   0.00000   4.00085   4.00085
+
+                PDB::
+
+                  CRYST1   80.017   80.017   80.017  60.00  60.00  90.00 P 1           1
+
+                XTC: c.trajectory.ts._unitcell::
+
+                  array([[ 80.00515747,   0.        ,   0.        ],
+                         [  0.        ,  80.00515747,   0.        ],
+                         [ 40.00257874,  40.00257874,  56.57218552]], dtype=float32)
+                """
+                # unit cell line (from http://manual.gromacs.org/current/online/gro.html)
+                # v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y)
+                # 0     1     2      3     4     5     6    7     8
+                x = self._unitcell['x']
+                y = self._unitcell['y']
+                z = self._unitcell['z']  # this ordering is correct! (checked it, OB)
+                return triclinic_box(x,y,z)
+
+class DMSReader(base.Reader):
+        """
+        Reads both coordinates and velocities. 
+        """
+        format = 'DMS'
+        units = {'time': None, 'length': 'A', 'velocity': 'A/ps'}
+        _Timestep = Timestep
+
+
+        def get_coordinates(self, cur):
+            cur.execute('SELECT * FROM particle')
+            particles = cur.fetchall()
+            return [ (p['x'], p['y'], p['z'])  for p in particles]   
+
+        def get_particle_by_columns(self, cur, columns=['x', 'y', 'z']):
+            cur.execute('SELECT * FROM particle')
+            particles = cur.fetchall()
+            return [ tuple([p[c] for c in columns ])  for p in particles]
+
+        def get_global_cell(self, cur):
+            cur.execute('SELECT * FROM global_cell')
+            rows = cur.fetchall()
+            assert len(rows) == 3 
+            x = [row["x"] for row in rows]
+            y = [row["y"] for row in rows]
+            z = [row["z"] for row in rows]
+            return {'x': x, 'y': y, 'z': z}
+
+        def __init__(self,filename,convert_units=None,**kwargs):
+                if convert_units is None:
+                        convert_units = MDAnalysis.core.flags['convert_gromacs_lengths']
+                self.convert_units = convert_units  # convert length and time to base units                
+
+                coords_list = None
+                velocities_list = None                
+                con = sqlite3.connect(filename)
+
+                def dict_factory(cursor, row):
+                    d = {}
+                    for idx, col in enumerate(cursor.description):
+                        d[col[0]] = row[idx]
+                    return d
+
+                with con:
+                      # This will return dictionaries instead of tuples, when calling cur.fetch() or fetchall()
+                      con.row_factory = dict_factory
+                      cur = con.cursor()                    
+                      coords_list = self.get_coordinates(cur)
+                      velocities_list = self.get_particle_by_columns(cur, columns=['vx', 'vy', 'vz'])
+                      unitcell = self.get_global_cell(cur)
+                assert coords_list
+                self.numatoms = len(coords_list)
+                coords_list = numpy.array(coords_list)
+                self.ts = self._Timestep(coords_list)
+                self.ts.frame = 1  # 1-based frame number
+                if velocities_list: #perform this operation only if velocities are present in coord file
+                    # TODO: use a Timestep that knows about velocities such as TRR.Timestep or better, TRJ.Timestep
+                    velocities_arr = numpy.array(velocities_list, dtype=numpy.float32)
+                    if numpy.any(velocities_arr):
+                        self.ts._velocities = velocities_arr
+                        self.convert_velocities_from_native(self.ts._velocities) #converts nm/ps to A/ps units
+                # ts._unitcell layout is format dependent; Timestep.dimensions does the conversion
+                self.ts._unitcell = unitcell 
+                if self.convert_units:
+                        self.convert_pos_from_native(self.ts._pos)             # in-place !
+                        self.convert_pos_from_native(self.ts._unitcell)        # in-place ! (all are lengths)
+                self.numframes = 1
+                self.fixed = 0
+                self.skip = 1
+                self.periodic = False
+                self.delta = 0
+                self.skip_timestep = 1                  
+
+        def Writer(self, filename, **kwargs):
+                raise NotImplementedError
+
+        def __iter__(self):
+                yield self.ts  # Just a single frame
+                raise StopIteration
+
+        def _read_frame(self, frame):
+                if frame != 0:
+                        raise IndexError("DMSReader only handles a single frame at frame index 0")
+                return self.ts
+
+        def _read_next_timestep(self):
+                # CRD files only contain a single frame
+                raise IOError
diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py
@@ -552,7 +552,7 @@
 
 __all__ = ['reader', 'writer']
 
-import PDB, PQR, DCD, CRD, XTC, TRR, GRO, XYZ, TRJ, PDBQT, LAMMPS
+import PDB, PQR, DCD, CRD, XTC, TRR, GRO, XYZ, TRJ, PDBQT, LAMMPS, DMS
 import base
 from core import reader, writer
 
@@ -572,6 +572,7 @@
                        'PQR': PQR.PQRReader,
                        'LAMMPS': LAMMPS.DCDReader,
                        'CHAIN': base.ChainReader,
+                       'DMS': DMS.DMSReader,
                        }
 
 #: formats of readers that can also handle gzip or bzip2 compressed files
@@ -585,6 +586,7 @@
                        'GRO': GRO.GROReader,
                        'CRD': CRD.CRDReader,
                        'PQR': PQR.PQRReader,
+                       'DMS': DMS.DMSReader,
 }
 
 #: hack: readers that ignore most errors (permissive=True); at the moment

diff --git a/package/MDAnalysis/core/AtomGroup.py b/package/MDAnalysis/core/AtomGroup.py
@@ -187,6 +187,7 @@ def __init__(self, number, name, type, resname, resid, segid, mass, charge,
         self.charge = charge
         self.radius = radius
         self.bfactor = bfactor
+        self.bonds = list()
     def __repr__(self):
         return "< Atom " + repr(self.number+1) + ": name " + repr(self.name) +" of type " + \
                repr(self.type) + " of resname " + repr(self.resname) + ", resid " +repr(self.resid) + " and segid " +repr(self.segid)+'>'

diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py
@@ -0,0 +1,83 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- http://mdanalysis.googlecode.com
+# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
+#               Elizabeth J. Denning, Oliver Beckstein,
+#               and contributors (see website for details)
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+#     N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
+#     O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
+#     Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
+#     doi:10.1002/jcc.21787
+#
+
+"""
+
+"""
+from __future__ import with_statement
+
+from MDAnalysis.core.AtomGroup import Atom
+from MDAnalysis.topology.core import guess_atom_type, Bond
+import numpy, sqlite3, collections
+
+class DMSParseError(Exception):
+        pass
+
+def parse(filename):
+        """Parse DMS file *filename* and return the dict `structure`.
+
+        Only reads the list of atoms.
+
+        :Returns: MDAnalysis internal *structure* dict, which contains 
+                  Atom and Bond objects
+
+        .. SeeAlso:: The *structure* dict is defined in
+                     :func:`MDAnalysis.topology.PSFParser.parse`.
+
+        """
+        con = sqlite3.connect(filename)
+
+        def dict_factory(cursor, row):
+            """
+            Fetch SQL records as dictionaries, rather than the default tuples.
+            """
+            d = {}
+            for idx, col in enumerate(cursor.description):
+                d[col[0]] = row[idx]
+            return d
+
+        atoms = None
+        with con:
+            # This will return dictionaries instead of tuples, when calling cur.fetch() or fetchall()
+            con.row_factory = dict_factory
+            cur = con.cursor()            
+            cur.execute('SELECT * FROM particle')
+            particles = cur.fetchall()
+
+            # p["anum"] contains the atomic number
+            atoms = [ (p["id"], Atom(p["id"], p["name"].strip(), guess_atom_type(p["name"].strip()), p["resname"].strip(), p["resid"], p["segid"].strip(), p["mass"], p["charge"]))  for p in particles]
+
+            atoms = collections.OrderedDict(atoms)
+
+            cur.execute('SELECT * FROM bond')
+            bonds = cur.fetchall()
+
+            bonds = [ Bond(atoms[b["p0"]], 
+                           atoms[b["p1"]], 
+                           b["order"] )  for b in bonds]
+
+        assert atoms
+        # All the records below besides donors and acceptors can be contained in a DMS file. 
+        # In addition to the coordinates and bonds, DMS contains the entire force-field information (terms+parameters).
+        return {"_atoms": atoms.values(), 
+                "_bonds": bonds,
+                "_angles": [],
+                "_dihe": [],
+                "_impr": [],
+                "_donors": [],
+                "_acceptors": [],
+                }            
diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py
@@ -78,7 +78,7 @@
 import core
 import PSFParser, TOPParser, \
     PDBParser, PrimitivePDBParser, PQRParser, GROParser, CRDParser, \
-    PDBQTParser
+    PDBQTParser, DMSParser
 
 # dictionary of known file formats and the corresponding file parser
 # (all parser should essentially do the same thing; the PSFParser is
@@ -91,7 +91,8 @@
                      'CRD': CRDParser.parse,
                      'TOP': TOPParser.parse,
                      'PRMTOP': TOPParser.parse,
-                     'PDBQT': PDBQTParser.parse
+                     'PDBQT': PDBQTParser.parse,
+                     'DMS': DMSParser.parse,
                      }
 _topology_parsers_permissive = _topology_parsers.copy()
 _topology_parsers_permissive['PDB'] = PrimitivePDBParser.parse
diff --git a/package/MDAnalysis/topology/core.py b/package/MDAnalysis/topology/core.py
@@ -70,9 +70,12 @@ def build_segments(atoms):
 
 class Bond(object):
     """A bond between two :class:`~MDAnalysis.core.AtomGroup.Atom` instances."""
-    def __init__(self, a1, a2):
+    def __init__(self, a1, a2, order=None):
         self.atom1 = a1
         self.atom2 = a2
+        self.order = order
+        [a.bonds.append(self) for a in [a1, a2]] 
+
     def partner(self, atom):
         if atom is self.atom1:
             return self.atom2

diff --git a/testsuite/MDAnalysisTests/data/adk_closed.dms b/testsuite/MDAnalysisTests/data/adk_closed.dms
diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py
@@ -49,6 +49,7 @@
            "PDBQT_querypdb",
            "FASTA",                             # sequence alignment, Issue 112 + 113
            "PDB_HOLE",                          # gramicidin A
+           "DMS",                               # DHFR
            ]
 
 from pkg_resources import resource_filename
@@ -107,3 +108,5 @@
 FASTA = resource_filename(__name__, 'data/test.fasta')
 
 PDB_HOLE = resource_filename(__name__, 'data/1grm_single.pdb')
+
+DMS = resource_filename(__name__, 'data/adk_closed.dms')
diff --git a/testsuite/MDAnalysisTests/test_coordinates.py b/testsuite/MDAnalysisTests/test_coordinates.py
@@ -25,7 +25,7 @@
 from nose.plugins.attrib import attr
 
 from MDAnalysis.tests.datafiles import PSF, DCD, DCD_empty, PDB_small, PDB_closed, PDB_multiframe, \
-    PDB, CRD, XTC, TRR, GRO, \
+    PDB, CRD, XTC, TRR, GRO, DMS, \
     XYZ, XYZ_bz2, XYZ_psf, PRM, TRJ, TRJ_bz2, PRMpbc, TRJpbc_bz2, PRMncdf, NCDF, PQR
 
 import os
@@ -834,6 +834,31 @@ def test_full_slice(self):
         assert_equal(frames, np.arange(self.universe.trajectory.numframes))
 
 
+class TestDMSReader(TestCase):
+    def setUp(self):
+        self.universe = mda.Universe(DMS)
+        self.ts = self.universe.trajectory.ts
+
+    def tearDown(self):
+        del self.universe
+        del self.ts
+
+    def test_global_cell(self):
+        assert_equal(self.ts.dimensions, [0., 0., 0., 0., 0., 0.])
+
+    def test_velocities(self):
+        assert_equal(hasattr(self.ts, "_velocities"), False)
+
+    def test_number_of_coords(self):
+        # Desired value taken from VMD
+        #      Info)    Atoms: 3341
+        assert_equal(len(self.universe.atoms),3341) 
+
+    def test_coords_atom_0(self):
+        # Desired coordinates taken directly from the SQLite file. Check unit conversion
+        coords_0 = np.array([-11.0530004501343, 26.6800003051758, 12.7419996261597,], dtype=np.float32)
+        assert_array_equal(self.universe.atoms[0].pos, coords_0)
+
 class TestGROReaderNoConversion(TestCase, RefAdK):
     def setUp(self):
         ##mda.core.flags['convert_gromacs_lengths'] = False