Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added feature drop negative weights #534

Merged
3 changes: 2 additions & 1 deletion AUTHORS.rst
Expand Up @@ -18,4 +18,5 @@ The following persons contributed to the development of the |pyam| framework:
- Jarmo Kikstra `@jkikstra <https://github.com/jkikstra>`_
- Michael Pimmer `@fonfon <https://github.com/fonfon>`_
- Patrick Jürgens `@pjuergens <https://github.com/pjuergens>`_
- Florian Maczek `@macflo8 <https://github.com/macflo8>`_
- Florian Maczek `@macflo8 <https://github.com/macflo8>`_
- Laura Wienpahl `@LauWien <https://github.com/LauWien>`_
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Expand Up @@ -4,6 +4,7 @@
- [#541](https://github.com/IAMconsortium/pyam/pull/541) Support units in binary operations
- [#538](https://github.com/IAMconsortium/pyam/pull/538) Add option to set defaults in binary operations
- [#537](https://github.com/IAMconsortium/pyam/pull/537) Enhance binary ops to support numerical arguments
- [#534](https://github.com/IAMconsortium/pyam/pull/534) Add feature to drop negative weights
- [#532](https://github.com/IAMconsortium/pyam/pull/532) Add an option to skip existing intermediate variables when aggregating recursivly
- [#533](https://github.com/IAMconsortium/pyam/pull/533) Add an `apply()` function for custom mathematical operations
- [#527](https://github.com/IAMconsortium/pyam/pull/527) Add an in-dataframe basic mathematical operations `subtract`, `add`, `multiply`, `divide`
Expand Down
37 changes: 33 additions & 4 deletions pyam/_aggregate.py
Expand Up @@ -95,7 +95,14 @@ def _aggregate_recursive(df, variable, recursive):


def _aggregate_region(
df, variable, region, subregions=None, components=False, method="sum", weight=None
df,
variable,
region,
subregions=None,
components=False,
method="sum",
weight=None,
drop_negative_weights=True,
):
"""Internal implementation for aggregating data over subregions"""
if not isstr(variable) and components is not False:
Expand All @@ -120,11 +127,20 @@ def _aggregate_region(
subregion_df = df.filter(region=subregions)
rows = subregion_df._apply_filters(variable=variable)
if weight is None:

if drop_negative_weights is False:
raise ValueError(
"Dropping negative weights can only be used with `weights`!"
LauWien marked this conversation as resolved.
Show resolved Hide resolved
)

_data = _group_and_agg(subregion_df._data[rows], "region", method=method)
else:
danielhuppmann marked this conversation as resolved.
Show resolved Hide resolved
weight_rows = subregion_df._apply_filters(variable=weight)
_data = _agg_weight(
subregion_df._data[rows], subregion_df._data[weight_rows], method
subregion_df._data[rows],
subregion_df._data[weight_rows],
method,
drop_negative_weights,
)

# if not `components=False`, add components at the `region` level
Expand Down Expand Up @@ -186,7 +202,7 @@ def _group_and_agg(df, by, method=np.sum):
return df.groupby(cols).agg(_get_method_func(method))


def _agg_weight(data, weight, method):
def _agg_weight(data, weight, method, drop_negative_weights):
"""Aggregate `data` by regions with weights, return indexed `pd.Series`"""

# only summation allowed with weights
Expand All @@ -198,9 +214,22 @@ def _agg_weight(data, weight, method):
if not data.droplevel(["variable", "unit"]).index.equals(weight.index):
raise ValueError("Inconsistent index between variable and weight!")

if drop_negative_weights is True:
if any(weight < 0):
logger.warning(
"Some of the weights are negative. "
"All data weighted by negative values will be dropped. "
"To apply both positive and negative weights to the data, "
"please use the keyword argument `drop_negative_weights=False`."
)
# Drop negative weights
weight[weight < 0] = None

col1 = data.index.names.difference(["region"])
col2 = data.index.names.difference(["region", "variable", "unit"])
return (data * weight).groupby(col1).sum() / weight.groupby(col2).sum()
return (data * weight).groupby(col1).apply(
pd.Series.sum, skipna=False
) / weight.groupby(col2).sum()


def _get_method_func(method):
Expand Down
31 changes: 23 additions & 8 deletions pyam/core.py
Expand Up @@ -1339,6 +1339,7 @@ def aggregate_region(
method="sum",
weight=None,
append=False,
drop_negative_weights=True,
):
"""Aggregate a timeseries over a number of subregions

Expand All @@ -1349,9 +1350,9 @@ def aggregate_region(
----------
variable : str or list of str
variable(s) to be aggregated
region : str, default 'World'
region : str, optional
region to which data will be aggregated
subregions : list of str
subregions : list of str, optional
list of subregions, defaults to all regions other than `region`
components : bool or list of str, optional
variables at the `region` level to be included in the aggregation
Expand All @@ -1361,12 +1362,14 @@ def aggregate_region(
method : func or str, optional
method to use for aggregation,
e.g. :func:`numpy.mean`, :func:`numpy.sum`, 'min', 'max'
weight : str, default None
weight : str, optional
variable to use as weight for the aggregation
(currently only supported with `method='sum'`)
append : bool, default False
append : bool, optional
append the aggregate timeseries to `self` and return None,
else return aggregate timeseries as new :class:`IamDataFrame`
drop_negative_weights : bool, optional
removes any aggregated values that are computed using negative weights

Returns
-------
Expand All @@ -1377,6 +1380,7 @@ def aggregate_region(
--------
add : Add timeseries data items `a` and `b` along an `axis`
aggregate : Aggregate timeseries data along the `variable` hierarchy.

"""
_df = _aggregate_region(
self,
Expand All @@ -1386,6 +1390,7 @@ def aggregate_region(
components=components,
method=method,
weight=weight,
drop_negative_weights=drop_negative_weights,
)

# else, append to `self` or return as `IamDataFrame`
Expand All @@ -1405,6 +1410,7 @@ def check_aggregate_region(
method="sum",
weight=None,
exclude_on_fail=False,
drop_negative_weights=True,
**kwargs,
):
"""Check whether a timeseries matches the aggregation across subregions
Expand All @@ -1413,11 +1419,11 @@ def check_aggregate_region(
----------
variable : str or list of str
variable(s) to be checked for matching aggregation of subregions
region : str, default 'World'
region : str, optional
region to be checked for matching aggregation of subregions
subregions : list of str
subregions : list of str, optional
list of subregions, defaults to all regions other than `region`
components : bool or list of str, default False
components : bool or list of str, optional
variables at the `region` level to be included in the aggregation
(ignored if False); if `True`, use all sub-categories of `variable`
included in `region` but not in any of the `subregions`;
Expand All @@ -1430,12 +1436,21 @@ def check_aggregate_region(
(currently only supported with `method='sum'`)
exclude_on_fail : boolean, optional
flag scenarios failing validation as `exclude: True`
drop_negative_weights : bool, optional
removes any aggregated values that are computed using negative weights
kwargs : arguments for comparison of values
passed to :func:`numpy.isclose`
"""
# compute aggregate from subregions, return None if no subregions
df_subregions = _aggregate_region(
self, variable, region, subregions, components, method, weight
self,
variable,
region,
subregions,
components,
method,
weight,
drop_negative_weights,
)

if df_subregions is None:
Expand Down
43 changes: 42 additions & 1 deletion tests/test_feature_aggregate.py
Expand Up @@ -5,7 +5,6 @@
import pandas as pd
from pyam import check_aggregate, IamDataFrame, IAMC_IDX
from pyam.testing import assert_iamframe_equal

from conftest import TEST_YEARS, DTS_MAPPING

LONG_IDX = IAMC_IDX + ["year"]
Expand Down Expand Up @@ -42,6 +41,20 @@
columns=LONG_IDX + ["value"],
)

NEG_WEIGHTS_DF = pd.DataFrame(
LauWien marked this conversation as resolved.
Show resolved Hide resolved
[
["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2005, -4.0],
["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2010, 5.0],
["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2005, 2.0],
["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2010, 3.0],
["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2005, 6.0],
["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2010, 6.0],
["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2005, 3.0],
["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2010, 4.0],
],
columns=LONG_IDX + ["value"],
)


@pytest.mark.parametrize(
"variable,data",
Expand Down Expand Up @@ -300,6 +313,34 @@ def test_aggregate_region_with_components(simple_df):
assert _df.check_aggregate_region(v, components=["foo"]) is None


def test_agg_weight():
variable = "Price|Carbon"
weight = "Emissions|CO2"
# negative weights should be dropped on default
obs_1 = IamDataFrame(NEG_WEIGHTS_DF).aggregate_region(variable, weight=weight)._data
exp_1 = np.array([5.25])
np.testing.assert_array_equal(obs_1.values, exp_1)

LauWien marked this conversation as resolved.
Show resolved Hide resolved
# negative weights shouldn't be dropped if drop_negative_weights=False
LauWien marked this conversation as resolved.
Show resolved Hide resolved
obs_2 = (
IamDataFrame(NEG_WEIGHTS_DF)
.aggregate_region(variable, weight=weight, drop_negative_weights=False)
._data
)
exp_2 = np.array([9, 5.25])
np.testing.assert_array_equal(obs_2.values, exp_2)


def test_aggregate_region_with_no_weights_drop_negative_weights_raises(simple_df):
# dropping negative weights can only be used with weight
pytest.raises(
ValueError,
simple_df.aggregate_region,
"Price|Carbon",
drop_negative_weights=False,
)


def test_aggregate_region_with_weights(simple_df):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
Expand Down