Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make pyedr also return a dictionary of units #56

Merged
merged 10 commits into from Aug 31, 2022
18 changes: 15 additions & 3 deletions README.rst
Expand Up @@ -18,17 +18,21 @@ the following functions:
type from a given path to an EDR file.
- ``read_edr``: parses an EDR file and returns the energy terms
in a nested list
- ``get_unit_dictionary``: Returns a dictionary that holds the units of each
energy term found in the EDR file.


Panedr
------

Panedr uses the Pyedr library to read a `Gromacs EDR`_ binary energy XDR file
and returns its contents as a pandas_ dataframe. Panedr exposes the
following function:
following functions:

- ``edr_to_df``: which gets the path to an EDR file and returns a
pandas DataFrame,
pandas DataFrame.
- ``get_unit_dictionary``: Returns a dictionary that holds the units of each
energy term found in the EDR file.


Example
Expand All @@ -52,6 +56,10 @@ Using ``pyedr``:
# Get the average pressure after the first 10 ns
pressure_avg = dic['Pressure'][dic['Time'] > 10000].mean()

# Get the units of the EDR entries
unit_dict = pyedr.get_unit_dictionary(path)
unit_dict["Temperature"] # returns "K"


Using ``panedr``:

Expand All @@ -71,6 +79,10 @@ Using ``panedr``:
# Get the average pressure after the first 10 ns
pressure_avg = df['Pressure'][df['Time'] > 10000].mean()

# Get the units of the EDR entries
unit_dict = panedr.get_unit_dictionary(path)
unit_dict["Temperature"] # returns "K"


Install
-------
Expand All @@ -79,7 +91,7 @@ You can install ``pyedr`` and ``panedr`` using ``pip``:

.. code:: bash

pip install pyedr
pip install pyedr

# installing panedr automatically installs pyedr
pip install panedr
Expand Down
4 changes: 1 addition & 3 deletions panedr/panedr/__init__.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from .panedr import edr_to_df, get_unit_dictionary
import pbr.version
__version__ = pbr.version.VersionInfo('panedr').release_string()
del pbr

from .panedr import edr_to_df
4 changes: 2 additions & 2 deletions panedr/panedr/panedr.py
Expand Up @@ -37,11 +37,11 @@
.. autofunction:: edr_to_df
"""

from pyedr import edr_to_dict, read_edr
from pyedr import read_edr, get_unit_dictionary
import pandas as pd


__all__ = ['edr_to_df', ]
__all__ = ['edr_to_df', 'get_unit_dictionary']


def edr_to_df(path: str, verbose: bool = False) -> pd.DataFrame:
Expand Down
35 changes: 23 additions & 12 deletions panedr/panedr/tests/test_edr.py
Expand Up @@ -12,15 +12,17 @@
from io import StringIO
from collections import namedtuple
from pathlib import Path
import pickle

import numpy as np
from numpy.testing import assert_allclose
import pandas
import pyedr
from pyedr.tests.test_edr import read_xvg, redirect_stderr
from pyedr.tests.datafiles import (
EDR, EDR_XVG, EDR_IRREGULAR, EDR_IRREGULAR_XVG,
EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_BLOCKS, EDR_BLOCKS_XVG
EDR, EDR_XVG, EDR_UNITS, EDR_IRREG, EDR_IRREG_XVG,
EDR_IRREG_UNITS, EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS,
EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS
)

import panedr
Expand All @@ -32,39 +34,48 @@
NDEC_PATTERN = re.compile(r'[\.eE]')

# Data constants
EDR_Data = namedtuple('EDR_Data', ['df', 'edr_dict', 'xvgdata', 'xvgtime',
'xvgnames', 'xvgcols', 'xvgprec', 'edrfile',
EDR_Data = namedtuple('EDR_Data', ['df', 'df_units', 'edr_dict', 'edr_units',
'xvgdata', 'xvgtime', 'xvgnames', 'xvgcols',
'xvgprec', 'true_units', 'edrfile',
'xvgfile'])


@pytest.fixture(scope='module',
params=[(EDR, EDR_XVG),
(EDR_IRREGULAR, EDR_IRREGULAR_XVG),
(EDR_DOUBLE, EDR_DOUBLE_XVG),
(EDR_BLOCKS, EDR_BLOCKS_XVG),
(Path(EDR), EDR_XVG),])
params=[(EDR, EDR_XVG, EDR_UNITS),
(EDR_IRREG, EDR_IRREG_XVG, EDR_IRREG_UNITS),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll let it go here because the likelihood is that no one was using these downstream, but arbitrarily renaming file object names like this is a breaking change, please avoid doing this.

(EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS),
(EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS),
(Path(EDR), EDR_XVG, EDR_UNITS), ])
def edr(request):
edrfile, xvgfile = request.param
edrfile, xvgfile, unitfile = request.param
df = panedr.edr_to_df(edrfile)
df_units = panedr.get_unit_dictionary(edrfile)
edr_dict = pyedr.edr_to_dict(edrfile)
edr_units = pyedr.get_unit_dictionary(edrfile)
with open(unitfile, "rb") as f:
true_units = pickle.load(f)
xvgdata, xvgnames, xvgprec = read_xvg(xvgfile)
xvgtime = xvgdata[:, 0]
xvgdata = xvgdata[:, 1:]
xvgcols = np.insert(xvgnames, 0, u'Time')
return EDR_Data(df, edr_dict, xvgdata, xvgtime, xvgnames,
xvgcols, xvgprec, edrfile, xvgfile)
return EDR_Data(df, df_units, edr_dict, edr_units, xvgdata, xvgtime,
xvgnames, xvgcols, xvgprec, true_units, edrfile, xvgfile)


class TestEdrToDf(object):
"""
Tests for :fun:`panedr.edr_to_df`.
"""

def test_output_type(self, edr):
"""
Test that the function returns a pandas DataFrame.
"""
assert isinstance(edr.df, pandas.DataFrame)

def test_units(self, edr):
assert edr.df_units == edr.true_units

def test_columns(self, edr):
"""
Test that the column names and order match.
Expand Down
4 changes: 1 addition & 3 deletions pyedr/pyedr/__init__.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from .pyedr import edr_to_dict, read_edr, get_unit_dictionary
import pbr.version
__version__ = pbr.version.VersionInfo('pyedr').release_string()
del pbr

from .pyedr import edr_to_dict, read_edr
31 changes: 29 additions & 2 deletions pyedr/pyedr/pyedr.py
Expand Up @@ -80,7 +80,7 @@
Enxnm = collections.namedtuple('Enxnm', 'name unit')
ENX_VERSION = 5

__all__ = ['edr_to_dict', 'read_edr']
__all__ = ['edr_to_dict', 'read_edr', 'get_unit_dictionary']

class EDRFile(object):
def __init__(self, path):
Expand Down Expand Up @@ -409,7 +409,10 @@ def is_frame_magic(data):
all_energies_type = List[List[float]]
all_names_type = List[str]
times_type = List[float]
read_edr_return_type = Tuple[all_energies_type, all_names_type, times_type]
read_edr_return_type = Tuple[all_energies_type,
all_names_type,
times_type,
Dict[str, str]]


def read_edr(path: str, verbose: bool = False) -> read_edr_return_type:
Expand All @@ -436,6 +439,8 @@ def read_edr(path: str, verbose: bool = False) -> read_edr_return_type:
A list containing the names of the energy terms found in the file
times: list[float]
A list containing the time of each step/frame.
unit_dict: Dict[str, str]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not in the return signature?

A dictionary mapping the term names to their units.
"""
begin = time.time()
edr_file = EDRFile(str(path))
Expand Down Expand Up @@ -464,6 +469,28 @@ def read_edr(path: str, verbose: bool = False) -> read_edr_return_type:
return all_energies, all_names, times


def get_unit_dictionary(path: str) -> Dict[str, str]:
"""Creates an EDRFile object which executes the :func:`do_enxnms`
method. This reads the names and units of the EDR data, which is returned
as a dictionary mapping column names (str) to unit names (str).

Parameters
----------
path : str
path to EDR file to be read

Returns
-------
unit_dict: Dict[str, str]
A dictionary mapping the term names to their units.
"""
edr_file = EDRFile(str(path))
unit_dict = {'Time': "ps"}
for nm in edr_file.nms:
unit_dict[nm.name] = nm.unit
return unit_dict


def edr_to_dict(path: str, verbose: bool = False) -> Dict[str, np.ndarray]:
"""Calls :func:`read_edr` and packs its return values into a dictionary

Expand Down
Binary file added pyedr/pyedr/tests/data/blocks_units.p
Binary file not shown.
Binary file added pyedr/pyedr/tests/data/cat_units.p
Binary file not shown.
Binary file added pyedr/pyedr/tests/data/double_units.p
Binary file not shown.
Binary file added pyedr/pyedr/tests/data/irregular_units.p
Binary file not shown.
8 changes: 6 additions & 2 deletions pyedr/pyedr/tests/datafiles.py
Expand Up @@ -30,12 +30,16 @@

EDR = resource_filename(__name__, 'data/cat.edr')
EDR_XVG = resource_filename(__name__, 'data/cat.xvg')
EDR_UNITS = resource_filename(__name__, 'data/cat_units.p')

EDR_IRREGULAR = resource_filename(__name__, 'data/irregular.edr')
EDR_IRREGULAR_XVG = resource_filename(__name__, 'data/irregular.xvg')
EDR_IRREG = resource_filename(__name__, 'data/irregular.edr')
EDR_IRREG_XVG = resource_filename(__name__, 'data/irregular.xvg')
EDR_IRREG_UNITS = resource_filename(__name__, 'data/irregular_units.p')

EDR_DOUBLE = resource_filename(__name__, 'data/double.edr')
EDR_DOUBLE_XVG = resource_filename(__name__, 'data/double.xvg')
EDR_DOUBLE_UNITS = resource_filename(__name__, 'data/double_units.p')

EDR_BLOCKS = resource_filename(__name__, 'data/blocks.edr')
EDR_BLOCKS_XVG = resource_filename(__name__, 'data/blocks.xvg')
EDR_BLOCKS_UNITS = resource_filename(__name__, 'data/blocks_units.p')
36 changes: 23 additions & 13 deletions pyedr/pyedr/tests/test_edr.py
Expand Up @@ -10,15 +10,17 @@
import re
import sys
import unittest
import pickle

import pytest
import numpy as np
from numpy.testing import assert_allclose

import pyedr
from pyedr.tests.datafiles import (
EDR, EDR_XVG, EDR_IRREGULAR, EDR_IRREGULAR_XVG,
EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_BLOCKS, EDR_BLOCKS_XVG
EDR, EDR_XVG, EDR_UNITS, EDR_IRREG, EDR_IRREG_XVG,
EDR_IRREG_UNITS, EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS,
EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS
)


Expand All @@ -28,39 +30,47 @@
NDEC_PATTERN = re.compile(r'[\.eE]')

# Data constants
EDR_Data = namedtuple('EDR_Data',
['edr_dict', 'xvgdata', 'xvgtime', 'xvgnames',
'xvgcols', 'xvgprec', 'edrfile', 'xvgfile'])
EDR_Data = namedtuple('EDR_Data',
['edr_dict', 'edr_units', 'xvgdata', 'xvgtime',
'xvgnames', 'xvgcols', 'xvgprec', 'true_units',
'edrfile', 'xvgfile'])


@pytest.fixture(scope='module',
params=[(EDR, EDR_XVG),
(EDR_IRREGULAR, EDR_IRREGULAR_XVG),
(EDR_DOUBLE, EDR_DOUBLE_XVG),
(EDR_BLOCKS, EDR_BLOCKS_XVG),
(Path(EDR), EDR_XVG),])
params=[(EDR, EDR_XVG, EDR_UNITS),
(EDR_IRREG, EDR_IRREG_XVG, EDR_IRREG_UNITS),
(EDR_DOUBLE, EDR_DOUBLE_XVG, EDR_DOUBLE_UNITS),
(EDR_BLOCKS, EDR_BLOCKS_XVG, EDR_BLOCKS_UNITS),
(Path(EDR), EDR_XVG, EDR_UNITS), ])
def edr(request):
edrfile, xvgfile = request.param
edrfile, xvgfile, unitfile = request.param
edr_dict = pyedr.edr_to_dict(edrfile)
edr_units = pyedr.get_unit_dictionary(edrfile)
xvgdata, xvgnames, xvgprec = read_xvg(xvgfile)
with open(unitfile, "rb") as f:
true_units = pickle.load(f)
xvgtime = xvgdata[:, 0]
xvgdata = xvgdata[:, 1:]
xvgcols = np.insert(xvgnames, 0, u'Time')
return EDR_Data(edr_dict, xvgdata, xvgtime, xvgnames,
xvgcols, xvgprec, edrfile, xvgfile)
return EDR_Data(edr_dict, edr_units, xvgdata, xvgtime, xvgnames,
xvgcols, xvgprec, true_units, edrfile, xvgfile)


class TestEdrToDict(object):
"""
Tests for :fun:`pyedr.edr_to_dict`.
"""

def test_output_type(self, edr):
"""
Test that the function returns a dictionary of ndarrays
"""
assert isinstance(edr.edr_dict, dict)
assert isinstance(edr.edr_dict['Time'], np.ndarray)

def test_units(self, edr):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The class name doesn't really match what this is doing, but I don't think it's worth being pedantic about it.

assert edr.edr_units == edr.true_units

def test_columns(self, edr):
"""
Test that the dictionary names match
Expand Down