Skip to content

Commit

Permalink
DMSReader for topology and coordinates
Browse files Browse the repository at this point in the history
- only read support, via sqlite3,
- reads coordinates, velocities and bonds only,
- added sanity tests (number of atoms and bonds),
- added atomselection tests (name, resname, segid).
  • Loading branch information
jandom committed Jan 21, 2013
1 parent a88b91f commit bebea4a
Show file tree
Hide file tree
Showing 10 changed files with 310 additions and 6 deletions.
149 changes: 149 additions & 0 deletions package/MDAnalysis/coordinates/DMS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
# Elizabeth J. Denning, Oliver Beckstein,
# and contributors (see website for details)
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
# O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
# Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
# doi:10.1002/jcc.21787
#

"""
"""
from __future__ import with_statement

import os, errno
import warnings

import numpy, sqlite3

import MDAnalysis
import base
import MDAnalysis.core.util as util
from MDAnalysis.coordinates.core import triclinic_box, triclinic_vectors

from copy import deepcopy

class Timestep(base.Timestep):
@property
def dimensions(self):
"""unitcell dimensions (A, B, C, alpha, beta, gamma)
GRO::
8.00170 8.00170 5.65806 0.00000 0.00000 0.00000 0.00000 4.00085 4.00085
PDB::
CRYST1 80.017 80.017 80.017 60.00 60.00 90.00 P 1 1
XTC: c.trajectory.ts._unitcell::
array([[ 80.00515747, 0. , 0. ],
[ 0. , 80.00515747, 0. ],
[ 40.00257874, 40.00257874, 56.57218552]], dtype=float32)
"""
# unit cell line (from http://manual.gromacs.org/current/online/gro.html)
# v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y)
# 0 1 2 3 4 5 6 7 8
x = self._unitcell['x']
y = self._unitcell['y']
z = self._unitcell['z'] # this ordering is correct! (checked it, OB)
return triclinic_box(x,y,z)

class DMSReader(base.Reader):
"""
Reads both coordinates and velocities.
"""
format = 'DMS'
units = {'time': None, 'length': 'A', 'velocity': 'A/ps'}
_Timestep = Timestep


def get_coordinates(self, cur):
cur.execute('SELECT * FROM particle')
particles = cur.fetchall()
return [ (p['x'], p['y'], p['z']) for p in particles]

def get_particle_by_columns(self, cur, columns=['x', 'y', 'z']):
cur.execute('SELECT * FROM particle')
particles = cur.fetchall()
return [ tuple([p[c] for c in columns ]) for p in particles]

def get_global_cell(self, cur):
cur.execute('SELECT * FROM global_cell')
rows = cur.fetchall()
assert len(rows) == 3
x = [row["x"] for row in rows]
y = [row["y"] for row in rows]
z = [row["z"] for row in rows]
return {'x': x, 'y': y, 'z': z}

def __init__(self,filename,convert_units=None,**kwargs):
if convert_units is None:
convert_units = MDAnalysis.core.flags['convert_gromacs_lengths']
self.convert_units = convert_units # convert length and time to base units

coords_list = None
velocities_list = None
con = sqlite3.connect(filename)

def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d

with con:
# This will return dictionaries instead of tuples, when calling cur.fetch() or fetchall()
con.row_factory = dict_factory
cur = con.cursor()
coords_list = self.get_coordinates(cur)
velocities_list = self.get_particle_by_columns(cur, columns=['vx', 'vy', 'vz'])
unitcell = self.get_global_cell(cur)
assert coords_list
self.numatoms = len(coords_list)
coords_list = numpy.array(coords_list)
self.ts = self._Timestep(coords_list)
self.ts.frame = 1 # 1-based frame number
if velocities_list: #perform this operation only if velocities are present in coord file
# TODO: use a Timestep that knows about velocities such as TRR.Timestep or better, TRJ.Timestep
velocities_arr = numpy.array(velocities_list, dtype=numpy.float32)
if numpy.any(velocities_arr):
self.ts._velocities = velocities_arr
self.convert_velocities_from_native(self.ts._velocities) #converts nm/ps to A/ps units
# ts._unitcell layout is format dependent; Timestep.dimensions does the conversion
self.ts._unitcell = unitcell
if self.convert_units:
self.convert_pos_from_native(self.ts._pos) # in-place !
self.convert_pos_from_native(self.ts._unitcell) # in-place ! (all are lengths)
self.numframes = 1
self.fixed = 0
self.skip = 1
self.periodic = False
self.delta = 0
self.skip_timestep = 1

def Writer(self, filename, **kwargs):
raise NotImplementedError

def __iter__(self):
yield self.ts # Just a single frame
raise StopIteration

def _read_frame(self, frame):
if frame != 0:
raise IndexError("DMSReader only handles a single frame at frame index 0")
return self.ts

def _read_next_timestep(self):
# CRD files only contain a single frame
raise IOError
4 changes: 3 additions & 1 deletion package/MDAnalysis/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@

__all__ = ['reader', 'writer']

import PDB, PQR, DCD, CRD, XTC, TRR, GRO, XYZ, TRJ, PDBQT, LAMMPS
import PDB, PQR, DCD, CRD, XTC, TRR, GRO, XYZ, TRJ, PDBQT, LAMMPS, DMS
import base
from core import reader, writer

Expand All @@ -572,6 +572,7 @@
'PQR': PQR.PQRReader,
'LAMMPS': LAMMPS.DCDReader,
'CHAIN': base.ChainReader,
'DMS': DMS.DMSReader,
}

#: formats of readers that can also handle gzip or bzip2 compressed files
Expand All @@ -585,6 +586,7 @@
'GRO': GRO.GROReader,
'CRD': CRD.CRDReader,
'PQR': PQR.PQRReader,
'DMS': DMS.DMSReader,
}

#: hack: readers that ignore most errors (permissive=True); at the moment
Expand Down
1 change: 1 addition & 0 deletions package/MDAnalysis/core/AtomGroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def __init__(self, number, name, type, resname, resid, segid, mass, charge,
self.charge = charge
self.radius = radius
self.bfactor = bfactor
self.bonds = list()
def __repr__(self):
return "< Atom " + repr(self.number+1) + ": name " + repr(self.name) +" of type " + \
repr(self.type) + " of resname " + repr(self.resname) + ", resid " +repr(self.resid) + " and segid " +repr(self.segid)+'>'
Expand Down
83 changes: 83 additions & 0 deletions package/MDAnalysis/topology/DMSParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
# Elizabeth J. Denning, Oliver Beckstein,
# and contributors (see website for details)
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
# O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
# Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
# doi:10.1002/jcc.21787
#

"""
"""
from __future__ import with_statement

from MDAnalysis.core.AtomGroup import Atom
from MDAnalysis.topology.core import guess_atom_type, Bond
import numpy, sqlite3, collections

class DMSParseError(Exception):
pass

def parse(filename):
"""Parse DMS file *filename* and return the dict `structure`.
Only reads the list of atoms.
:Returns: MDAnalysis internal *structure* dict, which contains
Atom and Bond objects
.. SeeAlso:: The *structure* dict is defined in
:func:`MDAnalysis.topology.PSFParser.parse`.
"""
con = sqlite3.connect(filename)

def dict_factory(cursor, row):
"""
Fetch SQL records as dictionaries, rather than the default tuples.
"""
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d

atoms = None
with con:
# This will return dictionaries instead of tuples, when calling cur.fetch() or fetchall()
con.row_factory = dict_factory
cur = con.cursor()
cur.execute('SELECT * FROM particle')
particles = cur.fetchall()

# p["anum"] contains the atomic number
atoms = [ (p["id"], Atom(p["id"], p["name"].strip(), guess_atom_type(p["name"].strip()), p["resname"].strip(), p["resid"], p["segid"].strip(), p["mass"], p["charge"])) for p in particles]

atoms = collections.OrderedDict(atoms)

cur.execute('SELECT * FROM bond')
bonds = cur.fetchall()

bonds = [ Bond(atoms[b["p0"]],
atoms[b["p1"]],
b["order"] ) for b in bonds]

assert atoms
# All the records below besides donors and acceptors can be contained in a DMS file.
# In addition to the coordinates and bonds, DMS contains the entire force-field information (terms+parameters).
return {"_atoms": atoms.values(),
"_bonds": bonds,
"_angles": [],
"_dihe": [],
"_impr": [],
"_donors": [],
"_acceptors": [],
}
5 changes: 3 additions & 2 deletions package/MDAnalysis/topology/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
import core
import PSFParser, TOPParser, \
PDBParser, PrimitivePDBParser, PQRParser, GROParser, CRDParser, \
PDBQTParser
PDBQTParser, DMSParser

# dictionary of known file formats and the corresponding file parser
# (all parser should essentially do the same thing; the PSFParser is
Expand All @@ -91,7 +91,8 @@
'CRD': CRDParser.parse,
'TOP': TOPParser.parse,
'PRMTOP': TOPParser.parse,
'PDBQT': PDBQTParser.parse
'PDBQT': PDBQTParser.parse,
'DMS': DMSParser.parse,
}
_topology_parsers_permissive = _topology_parsers.copy()
_topology_parsers_permissive['PDB'] = PrimitivePDBParser.parse
5 changes: 4 additions & 1 deletion package/MDAnalysis/topology/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ def build_segments(atoms):

class Bond(object):
"""A bond between two :class:`~MDAnalysis.core.AtomGroup.Atom` instances."""
def __init__(self, a1, a2):
def __init__(self, a1, a2, order=None):
self.atom1 = a1
self.atom2 = a2
self.order = order
[a.bonds.append(self) for a in [a1, a2]]

def partner(self, atom):
if atom is self.atom1:
return self.atom2
Expand Down
Binary file added testsuite/MDAnalysisTests/data/adk_closed.dms
Binary file not shown.
3 changes: 3 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"PDBQT_querypdb",
"FASTA", # sequence alignment, Issue 112 + 113
"PDB_HOLE", # gramicidin A
"DMS", # DHFR
]

from pkg_resources import resource_filename
Expand Down Expand Up @@ -107,3 +108,5 @@
FASTA = resource_filename(__name__, 'data/test.fasta')

PDB_HOLE = resource_filename(__name__, 'data/1grm_single.pdb')

DMS = resource_filename(__name__, 'data/adk_closed.dms')
27 changes: 26 additions & 1 deletion testsuite/MDAnalysisTests/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from nose.plugins.attrib import attr

from MDAnalysis.tests.datafiles import PSF, DCD, DCD_empty, PDB_small, PDB_closed, PDB_multiframe, \
PDB, CRD, XTC, TRR, GRO, \
PDB, CRD, XTC, TRR, GRO, DMS, \
XYZ, XYZ_bz2, XYZ_psf, PRM, TRJ, TRJ_bz2, PRMpbc, TRJpbc_bz2, PRMncdf, NCDF, PQR

import os
Expand Down Expand Up @@ -834,6 +834,31 @@ def test_full_slice(self):
assert_equal(frames, np.arange(self.universe.trajectory.numframes))


class TestDMSReader(TestCase):
def setUp(self):
self.universe = mda.Universe(DMS)
self.ts = self.universe.trajectory.ts

def tearDown(self):
del self.universe
del self.ts

def test_global_cell(self):
assert_equal(self.ts.dimensions, [0., 0., 0., 0., 0., 0.])

def test_velocities(self):
assert_equal(hasattr(self.ts, "_velocities"), False)

def test_number_of_coords(self):
# Desired value taken from VMD
# Info) Atoms: 3341
assert_equal(len(self.universe.atoms),3341)

def test_coords_atom_0(self):
# Desired coordinates taken directly from the SQLite file. Check unit conversion
coords_0 = np.array([-11.0530004501343, 26.6800003051758, 12.7419996261597,], dtype=np.float32)
assert_array_equal(self.universe.atoms[0].pos, coords_0)

class TestGROReaderNoConversion(TestCase, RefAdK):
def setUp(self):
##mda.core.flags['convert_gromacs_lengths'] = False
Expand Down
Loading

0 comments on commit bebea4a

Please sign in to comment.