Skip to content

Commit

Permalink
Make pyedr also return a dictionary of units (#56)
Browse files Browse the repository at this point in the history
* Made pyedr also return a dictionary of units
  • Loading branch information
BFedder committed Aug 31, 2022
1 parent ccfdc89 commit 2c0efef
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 40 deletions.
18 changes: 15 additions & 3 deletions README.rst
Expand Up @@ -18,17 +18,21 @@ the following functions:
type from a given path to an EDR file.
- ``read_edr``: parses an EDR file and returns the energy terms
in a nested list
- ``get_unit_dictionary``: Returns a dictionary that holds the units of each
energy term found in the EDR file.


Panedr
------

Panedr uses the Pyedr library to read a `Gromacs EDR`_ binary energy XDR file
and returns its contents as a pandas_ dataframe. Panedr exposes the
following function:
following functions:

- ``edr_to_df``: which gets the path to an EDR file and returns a
pandas DataFrame,
pandas DataFrame.
- ``get_unit_dictionary``: Returns a dictionary that holds the units of each
energy term found in the EDR file.


Example
Expand All @@ -52,6 +56,10 @@ Using ``pyedr``:
# Get the average pressure after the first 10 ns
pressure_avg = dic['Pressure'][dic['Time'] > 10000].mean()
# Get the units of the EDR entries
unit_dict = pyedr.get_unit_dictionary(path)
unit_dict["Temperature"] # returns "K"
Using ``panedr``:

Expand All @@ -71,6 +79,10 @@ Using ``panedr``:
# Get the average pressure after the first 10 ns
pressure_avg = df['Pressure'][df['Time'] > 10000].mean()
# Get the units of the EDR entries
unit_dict = panedr.get_unit_dictionary(path)
unit_dict["Temperature"] # returns "K"
Install
-------
Expand All @@ -79,7 +91,7 @@ You can install ``pyedr`` and ``panedr`` using ``pip``:

.. code:: bash
pip install pyedr
pip install pyedr
# installing panedr automatically installs pyedr
pip install panedr
Expand Down
4 changes: 1 addition & 3 deletions panedr/panedr/__init__.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from .panedr import edr_to_df, get_unit_dictionary
import pbr.version
__version__ = pbr.version.VersionInfo('panedr').release_string()
del pbr

from .panedr import edr_to_df
4 changes: 2 additions & 2 deletions panedr/panedr/panedr.py
Expand Up @@ -37,11 +37,11 @@
.. autofunction:: edr_to_df
"""

from pyedr import edr_to_dict, read_edr
from pyedr import read_edr, get_unit_dictionary
import pandas as pd


__all__ = ['edr_to_df', ]
__all__ = ['edr_to_df', 'get_unit_dictionary']


def edr_to_df(path: str, verbose: bool = False) -> pd.DataFrame:
Expand Down
35 changes: 23 additions & 12 deletions panedr/panedr/tests/test_edr.py
Expand Up @@ -12,15 +12,17 @@
from io import StringIO
from collections import namedtuple
from pathlib import Path
import pickle

import numpy as np
from numpy.testing import assert_allclose
import pandas
import pyedr
from pyedr.tests.test_edr import read_xvg, redirect_stderr
from pyedr.tests.datafiles import (
EDR, EDR_XVG, EDR_IRREGULAR, EDR_IRREGULAR_XVG,
EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_BLOCKS, EDR_BLOCKS_XVG
EDR, EDR_XVG, EDR_UNITS, EDR_IRREG, EDR_IRREG_XVG,
EDR_IRREG_UNITS, EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS,
EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS
)

import panedr
Expand All @@ -32,39 +34,48 @@
NDEC_PATTERN = re.compile(r'[\.eE]')

# Data constants
EDR_Data = namedtuple('EDR_Data', ['df', 'edr_dict', 'xvgdata', 'xvgtime',
'xvgnames', 'xvgcols', 'xvgprec', 'edrfile',
EDR_Data = namedtuple('EDR_Data', ['df', 'df_units', 'edr_dict', 'edr_units',
'xvgdata', 'xvgtime', 'xvgnames', 'xvgcols',
'xvgprec', 'true_units', 'edrfile',
'xvgfile'])


@pytest.fixture(scope='module',
params=[(EDR, EDR_XVG),
(EDR_IRREGULAR, EDR_IRREGULAR_XVG),
(EDR_DOUBLE, EDR_DOUBLE_XVG),
(EDR_BLOCKS, EDR_BLOCKS_XVG),
(Path(EDR), EDR_XVG),])
params=[(EDR, EDR_XVG, EDR_UNITS),
(EDR_IRREG, EDR_IRREG_XVG, EDR_IRREG_UNITS),
(EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS),
(EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS),
(Path(EDR), EDR_XVG, EDR_UNITS), ])
def edr(request):
edrfile, xvgfile = request.param
edrfile, xvgfile, unitfile = request.param
df = panedr.edr_to_df(edrfile)
df_units = panedr.get_unit_dictionary(edrfile)
edr_dict = pyedr.edr_to_dict(edrfile)
edr_units = pyedr.get_unit_dictionary(edrfile)
with open(unitfile, "rb") as f:
true_units = pickle.load(f)
xvgdata, xvgnames, xvgprec = read_xvg(xvgfile)
xvgtime = xvgdata[:, 0]
xvgdata = xvgdata[:, 1:]
xvgcols = np.insert(xvgnames, 0, u'Time')
return EDR_Data(df, edr_dict, xvgdata, xvgtime, xvgnames,
xvgcols, xvgprec, edrfile, xvgfile)
return EDR_Data(df, df_units, edr_dict, edr_units, xvgdata, xvgtime,
xvgnames, xvgcols, xvgprec, true_units, edrfile, xvgfile)


class TestEdrToDf(object):
"""
Tests for :fun:`panedr.edr_to_df`.
"""

def test_output_type(self, edr):
"""
Test that the function returns a pandas DataFrame.
"""
assert isinstance(edr.df, pandas.DataFrame)

def test_units(self, edr):
assert edr.df_units == edr.true_units

def test_columns(self, edr):
"""
Test that the column names and order match.
Expand Down
4 changes: 1 addition & 3 deletions pyedr/pyedr/__init__.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from .pyedr import edr_to_dict, read_edr, get_unit_dictionary
import pbr.version
__version__ = pbr.version.VersionInfo('pyedr').release_string()
del pbr

from .pyedr import edr_to_dict, read_edr
28 changes: 26 additions & 2 deletions pyedr/pyedr/pyedr.py
Expand Up @@ -80,7 +80,7 @@
Enxnm = collections.namedtuple('Enxnm', 'name unit')
ENX_VERSION = 5

__all__ = ['edr_to_dict', 'read_edr']
__all__ = ['edr_to_dict', 'read_edr', 'get_unit_dictionary']

class EDRFile(object):
def __init__(self, path):
Expand Down Expand Up @@ -409,7 +409,9 @@ def is_frame_magic(data):
all_energies_type = List[List[float]]
all_names_type = List[str]
times_type = List[float]
read_edr_return_type = Tuple[all_energies_type, all_names_type, times_type]
read_edr_return_type = Tuple[all_energies_type,
all_names_type,
times_type]


def read_edr(path: str, verbose: bool = False) -> read_edr_return_type:
Expand Down Expand Up @@ -464,6 +466,28 @@ def read_edr(path: str, verbose: bool = False) -> read_edr_return_type:
return all_energies, all_names, times


def get_unit_dictionary(path: str) -> Dict[str, str]:
"""Creates an EDRFile object which executes the :func:`do_enxnms`
method. This reads the names and units of the EDR data, which is returned
as a dictionary mapping column names (str) to unit names (str).
Parameters
----------
path : str
path to EDR file to be read
Returns
-------
unit_dict: Dict[str, str]
A dictionary mapping the term names to their units.
"""
edr_file = EDRFile(str(path))
unit_dict = {'Time': "ps"}
for nm in edr_file.nms:
unit_dict[nm.name] = nm.unit
return unit_dict


def edr_to_dict(path: str, verbose: bool = False) -> Dict[str, np.ndarray]:
"""Calls :func:`read_edr` and packs its return values into a dictionary
Expand Down
Binary file added pyedr/pyedr/tests/data/blocks_units.p
Binary file not shown.
Binary file added pyedr/pyedr/tests/data/cat_units.p
Binary file not shown.
Binary file added pyedr/pyedr/tests/data/double_units.p
Binary file not shown.
Binary file added pyedr/pyedr/tests/data/irregular_units.p
Binary file not shown.
8 changes: 6 additions & 2 deletions pyedr/pyedr/tests/datafiles.py
Expand Up @@ -30,12 +30,16 @@

EDR = resource_filename(__name__, 'data/cat.edr')
EDR_XVG = resource_filename(__name__, 'data/cat.xvg')
EDR_UNITS = resource_filename(__name__, 'data/cat_units.p')

EDR_IRREGULAR = resource_filename(__name__, 'data/irregular.edr')
EDR_IRREGULAR_XVG = resource_filename(__name__, 'data/irregular.xvg')
EDR_IRREG = resource_filename(__name__, 'data/irregular.edr')
EDR_IRREG_XVG = resource_filename(__name__, 'data/irregular.xvg')
EDR_IRREG_UNITS = resource_filename(__name__, 'data/irregular_units.p')

EDR_DOUBLE = resource_filename(__name__, 'data/double.edr')
EDR_DOUBLE_XVG = resource_filename(__name__, 'data/double.xvg')
EDR_DOUBLE_UNITS = resource_filename(__name__, 'data/double_units.p')

EDR_BLOCKS = resource_filename(__name__, 'data/blocks.edr')
EDR_BLOCKS_XVG = resource_filename(__name__, 'data/blocks.xvg')
EDR_BLOCKS_UNITS = resource_filename(__name__, 'data/blocks_units.p')
36 changes: 23 additions & 13 deletions pyedr/pyedr/tests/test_edr.py
Expand Up @@ -10,15 +10,17 @@
import re
import sys
import unittest
import pickle

import pytest
import numpy as np
from numpy.testing import assert_allclose

import pyedr
from pyedr.tests.datafiles import (
EDR, EDR_XVG, EDR_IRREGULAR, EDR_IRREGULAR_XVG,
EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_BLOCKS, EDR_BLOCKS_XVG
EDR, EDR_XVG, EDR_UNITS, EDR_IRREG, EDR_IRREG_XVG,
EDR_IRREG_UNITS, EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS,
EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS
)


Expand All @@ -28,39 +30,47 @@
NDEC_PATTERN = re.compile(r'[\.eE]')

# Data constants
EDR_Data = namedtuple('EDR_Data',
['edr_dict', 'xvgdata', 'xvgtime', 'xvgnames',
'xvgcols', 'xvgprec', 'edrfile', 'xvgfile'])
EDR_Data = namedtuple('EDR_Data',
['edr_dict', 'edr_units', 'xvgdata', 'xvgtime',
'xvgnames', 'xvgcols', 'xvgprec', 'true_units',
'edrfile', 'xvgfile'])


@pytest.fixture(scope='module',
params=[(EDR, EDR_XVG),
(EDR_IRREGULAR, EDR_IRREGULAR_XVG),
(EDR_DOUBLE, EDR_DOUBLE_XVG),
(EDR_BLOCKS, EDR_BLOCKS_XVG),
(Path(EDR), EDR_XVG),])
params=[(EDR, EDR_XVG, EDR_UNITS),
(EDR_IRREG, EDR_IRREG_XVG, EDR_IRREG_UNITS),
(EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS),
(EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS),
(Path(EDR), EDR_XVG, EDR_UNITS), ])
def edr(request):
edrfile, xvgfile = request.param
edrfile, xvgfile, unitfile = request.param
edr_dict = pyedr.edr_to_dict(edrfile)
edr_units = pyedr.get_unit_dictionary(edrfile)
xvgdata, xvgnames, xvgprec = read_xvg(xvgfile)
with open(unitfile, "rb") as f:
true_units = pickle.load(f)
xvgtime = xvgdata[:, 0]
xvgdata = xvgdata[:, 1:]
xvgcols = np.insert(xvgnames, 0, u'Time')
return EDR_Data(edr_dict, xvgdata, xvgtime, xvgnames,
xvgcols, xvgprec, edrfile, xvgfile)
return EDR_Data(edr_dict, edr_units, xvgdata, xvgtime, xvgnames,
xvgcols, xvgprec, true_units, edrfile, xvgfile)


class TestEdrToDict(object):
"""
Tests for :fun:`pyedr.edr_to_dict`.
"""

def test_output_type(self, edr):
"""
Test that the function returns a dictionary of ndarrays
"""
assert isinstance(edr.edr_dict, dict)
assert isinstance(edr.edr_dict['Time'], np.ndarray)

def test_units(self, edr):
assert edr.edr_units == edr.true_units

def test_columns(self, edr):
"""
Test that the dictionary names match
Expand Down

0 comments on commit 2c0efef

Please sign in to comment.