Skip to content

Commit

Permalink
Support units in binary operations (#541)
Browse files Browse the repository at this point in the history
Co-authored-by: pjuergens <74722312+pjuergens@users.noreply.github.com>
  • Loading branch information
danielhuppmann and pjuergens committed Jun 8, 2021
1 parent a4c6c71 commit 8d8aa6b
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 95 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Next Release

- [#541](https://github.com/IAMconsortium/pyam/pull/541) Support units in binary operations
- [#538](https://github.com/IAMconsortium/pyam/pull/538) Add option to set defaults in binary operations
- [#537](https://github.com/IAMconsortium/pyam/pull/537) Enhance binary ops to support numerical arguments
- [#533](https://github.com/IAMconsortium/pyam/pull/533) Add an `apply()` function for custom mathematical operations
Expand Down
128 changes: 107 additions & 21 deletions pyam/_ops.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,39 @@
import operator
import pandas as pd
from pyam.index import append_index_level, get_index_levels
from pyam.index import append_index_level, get_index_levels, replace_index_values
from pyam.utils import to_list
from iam_units import registry
from pint import Quantity


# these functions have to be defined explicitly to allow calling them with keyword args
def add(a, b):
return operator.add(a, b)
return operator.add(*_make_series(a, b))


def subtract(a, b):
return operator.sub(a, b)
return operator.sub(*_make_series(a, b))


def multiply(a, b):
return operator.mul(a, b)
return operator.mul(*_make_series(a, b))


def divide(a, b):
return operator.truediv(a, b)
return operator.truediv(*_make_series(a, b))


def _make_series(a, b):
"""Cast instances of a pint.Quantity to a pd.Series
Calling an operation on a pd.Series and a pint.Quantity removes the units,
therefore a pint.Quantity is transformed to a pd.Series of quantities.
"""
if isinstance(a, Quantity):
return pd.Series([a] * len(b), index=b.index), b
if isinstance(b, Quantity):
return a, pd.Series([b] * len(a), index=a.index)
return a, b


KNOWN_OPS = {
Expand All @@ -29,7 +44,7 @@ def divide(a, b):
}


def _op_data(df, name, method, axis, fillna=None, args=(), **kwds):
def _op_data(df, name, method, axis, fillna=None, args=(), ignore_units=False, **kwds):
"""Internal implementation of numerical operations on timeseries"""

if axis not in df._data.index.names:
Expand All @@ -46,13 +61,39 @@ def _op_data(df, name, method, axis, fillna=None, args=(), **kwds):

# replace args and and kwds with values of `df._data` if applicable
# _data_args and _data_kwds track if an argument was replaced by `df._data` values
_args, _data_args = [None] * len(args), [False] * len(args)
n = len(args)
_args, _data_args, _units_args = [None] * n, [False] * n, [None] * n
for i, value in enumerate(args):
_args[i], _data_args[i] = _get_values(df, axis, value, cols, f"_arg{i}")
_args[i], _units_args[i], _data_args[i] = _get_values(
df, axis, value, cols, f"_arg{i}"
)

_data_kwds = {}
_data_kwds, _unit_kwds = {}, {}
for i, (key, value) in enumerate(kwds.items()):
kwds[key], _data_kwds[key] = _get_values(df, axis, value, cols, key)
kwds[key], _unit_kwds[key], _data_kwds[key] = _get_values(
df, axis, value, cols, key
)

# fast-pass on units: override pint for some methods if all kwds have the same unit
if (
method in [add, subtract, divide]
and ignore_units is False
and fillna is None
and len(_unit_kwds["a"]) == 1
and len(_unit_kwds["b"]) == 1
and registry.Unit(_unit_kwds["a"][0]) == registry.Unit(_unit_kwds["b"][0])
):
# activate ignore-units feature
ignore_units = _unit_kwds["a"][0] if method in [add, subtract] else ""
# downcast `pint.Quantity` to numerical value
kwds["a"], kwds["b"] = _to_value(kwds["a"]), _to_value(kwds["b"])

# cast args and kwds to pd.Series of pint.Quantity
if ignore_units is False:
for i, is_data in enumerate(_data_args):
_args[i] = _to_quantity(_args[i]) if is_data else _args[i]
for key, value in kwds.items():
kwds[key] = _to_quantity(value) if _data_kwds[key] else value

# merge all args and kwds that are based on `df._data` to apply fillna
if fillna:
Expand All @@ -70,14 +111,33 @@ def _op_data(df, name, method, axis, fillna=None, args=(), **kwds):
kwds[key] = _data[key]

# apply method and check that returned object is valid
_value = method(*_args, **kwds)
if not isinstance(_value, pd.Series):
result = method(*_args, **kwds)
if not isinstance(result, pd.Series):
msg = f"Value returned by `{method.__name__}` cannot be cast to an IamDataFrame"
raise ValueError(f"{msg}: {_value}")

# insert the index level and reset the series name
raise ValueError(f"{msg}: {result}")

# separate pint quantities into numerical value and unit (as index)
if ignore_units is False:
_value = pd.DataFrame(
[[i.magnitude, "{:~}".format(i.units)] for i in result.values],
columns=["value", "unit"],
index=result.index,
).set_index("unit", append=True)
_value.index = replace_index_values(_value, "unit", {"dimensionless": ""})

# otherwise, set unit (as index) to "unknown" or the value given by "ignore_units"
else:
index = append_index_level(
result.reset_index("unit", drop=True).index,
codes=0,
level="unknown" if ignore_units is True else ignore_units,
name="unit",
)
_value = pd.Series(result.values, index=index, name="value")

# append the `name` to the index on the `axis`
_value.index = append_index_level(_value.index, codes=0, level=name, name=axis)
return _value.rename(index=None)
return _value


def _get_values(df, axis, value, cols, name):
Expand All @@ -98,11 +158,37 @@ def _get_values(df, axis, value, cols, name):
Returns
-------
Either filtered timeseries from `df` or `value`
Tuple of the following:
- Either `df.data` downselected by `{axis: value}` or `value`
- List of units of the timeseries data or `value`
- Bool whether first item was derived from `df.data`
"""
# check if `value` is a `pint.Quantity` and return unit specifically
if isinstance(value, Quantity):
return value, [value.units], False
# try selecting from `df.data`
if any(v in get_index_levels(df._data, axis) for v in to_list(value)):
_data = df.filter(**{axis: value})._data.groupby(cols).sum().rename(index=name)
return _data, True
else:
return value, False
_df = df.filter(**{axis: value})
return _df._data.groupby(cols).sum().rename(index=name), _df.unit, True
# else, return value
return value, [], False


def _to_quantity(data):
"""Convert the values of an indexed pd.Series into pint.Quantity instances"""
return pd.Series(
[
registry.Quantity(v, u)
for v, u in zip(data.values, data.index.get_level_values("unit"))
],
index=data.reset_index("unit", drop=True).index,
name=data.name,
)


def _to_value(x):
"""Return the value of a pint.Quantity"""
if isinstance(x, Quantity):
return x.magnitude
return x
Loading

0 comments on commit 8d8aa6b

Please sign in to comment.