Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset: don't scale non-default units #470

Merged
merged 2 commits into from Nov 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 25 additions & 7 deletions qcfractal/interface/collections/dataset.py
Expand Up @@ -11,11 +11,10 @@
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm

from pydantic import Field, validator
from qcelemental import constants
from qcelemental.models.types import Array
from tqdm import tqdm

from ..models import Citation, ComputeResponse, ObjectId, ProtoModel
from ..statistics import wrap_statistics
Expand All @@ -39,12 +38,15 @@ class MoleculeEntry(ProtoModel):
class ContributedValues(ProtoModel):
name: str = Field(..., description="The name of the contributed values.")
values: Any = Field(..., description="The values in the contributed values.")
index: Array[str] = Field(..., description="The entry index for the contributed values, matches the order of the `values` array.")
values_structure: Dict[str, Any] = Field({}, description="A machine readable description of the values structure. Typically not needed.")
index: Array[str] = Field(
..., description="The entry index for the contributed values, matches the order of the `values` array.")
values_structure: Dict[str, Any] = Field(
{}, description="A machine readable description of the values structure. Typically not needed.")

theory_level: Union[str, Dict[str, str]] = Field(..., description="A string representation of the theory level.")
units: str = Field(..., description="The units of the values, can be any valid QCElemental unit.")
theory_level_details: Optional[Union[str, Dict[str, Optional[str]]]] = Field(None, description="A detailed reprsentation of the theory level.")
theory_level_details: Optional[Union[str, Dict[str, Optional[str]]]] = Field(
None, description="A detailed reprsentation of the theory level.")

citations: Optional[List[Citation]] = Field(None, description="Citations associated with the contributed values.")
external_url: Optional[str] = Field(None, description="An external URL to the raw contributed values data.")
Expand All @@ -59,6 +61,7 @@ def _make_array(cls, v):

return v


class Dataset(Collection):
"""
The Dataset class for homogeneous computations on many molecules.
Expand Down Expand Up @@ -1171,8 +1174,23 @@ def units(self):

@units.setter
def units(self, value):

self.df *= constants.conversion_factor(self._units, value)
for column in self.df.columns:
try:
self.df[column] *= constants.conversion_factor(self._column_metadata[column]["units"], value)

# Cast units to quantities so that `kcal / mol` == `kilocalorie / mole`
metadata_quantity = constants.Quantity(self._column_metadata[column]["units"])
self_quantity = constants.Quantity(self._units)
if metadata_quantity != self_quantity:
warnings.warn(f"Data column '{column}' did not have the same units as the dataset. "
f"This has been corrected.")
self._column_metadata[column]["units"] = value
except ValueError as e:
# This is meant to catch pint.errors.DimensionalityError without importing pint, which is too slow
if e.__class__.__name__ == "DimensionalityError":
pass
else:
raise
self._units = value

def set_default_program(self, program: str) -> bool:
Expand Down
15 changes: 15 additions & 0 deletions qcfractal/tests/test_collections.py
Expand Up @@ -439,6 +439,17 @@ def test_dataset_contributed_units(contributed_dataset_fixture):
assert qcel.constants.ureg(
ds._column_metadata[ds.get_values(name="Fake Dipole").columns[0]]["units"]) == qcel.constants.ureg("e * bohr")

old_units = ds.units
assert old_units == "kcal / mol"
ds.units = "kcal/mol"
before = ds.get_values()
ds.units = "hartree"
after = ds.get_values()
assert before["Fake Energy"][0] == after["Fake Energy"][0] / qcel.constants.conversion_factor(
"kcal/mol", "hartree")
assert before["Fake Gradient"][0][0, 0] == after["Fake Gradient"][0][0, 0]
ds.units = old_units


def test_dataset_contributed_mixed_values(contributed_dataset_fixture):
_, ds = contributed_dataset_fixture
Expand Down Expand Up @@ -531,6 +542,7 @@ def test_reactiondataset_check_state(fractal_compute_server):
def test_contributed_dataset_values_subset(contributed_dataset_fixture, use_cache):
client, ds = contributed_dataset_fixture

ds._clear_cache()
allvals = ds.get_values()
ds._clear_cache()

Expand Down Expand Up @@ -1061,6 +1073,7 @@ def test_s22_view_identical(s22_fixture):
assert_view_identical(ds)


@pytest.mark.slow
def test_view_download_remote(s22_fixture):
client, ds = s22_fixture

Expand Down Expand Up @@ -1118,11 +1131,13 @@ def test_contributed_dataset_plaintextview_write(contributed_dataset_fixture, tm
ds.to_file(tmpdir / "test.tar.gz", "plaintext")


@pytest.mark.slow
def test_s22_dataset_plaintextview_write(s22_fixture, tmpdir):
_, ds = s22_fixture
ds.to_file(tmpdir / "test.tar.gz", "plaintext")


@pytest.mark.slow
def test_qm3_dataset_plaintextview_write(qm3_fixture, tmpdir):
_, ds = qm3_fixture
ds.to_file(tmpdir / "test.tar.gz", "plaintext")
Expand Down