Skip to content

Commit

Permalink
added feature drop negative weights
Browse files Browse the repository at this point in the history
  • Loading branch information
LauWien committed May 17, 2021
1 parent d25cfb7 commit c3bd282
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
15 changes: 12 additions & 3 deletions pyam/_aggregate.py
Expand Up @@ -78,7 +78,7 @@ def _aggregate_recursive(df, variable):


def _aggregate_region(
df, variable, region, subregions=None, components=False, method="sum", weight=None
df, variable, region, subregions=None, components=False, method="sum", weight=None, drop_negative=True
):
"""Internal implementation for aggregating data over subregions"""
if not isstr(variable) and components is not False:
Expand Down Expand Up @@ -107,7 +107,7 @@ def _aggregate_region(
else:
weight_rows = subregion_df._apply_filters(variable=weight)
_data = _agg_weight(
subregion_df._data[rows], subregion_df._data[weight_rows], method
subregion_df._data[rows], subregion_df._data[weight_rows], method, drop_negative
)

# if not `components=False`, add components at the `region` level
Expand Down Expand Up @@ -169,7 +169,7 @@ def _group_and_agg(df, by, method=np.sum):
return df.groupby(cols).agg(_get_method_func(method))


def _agg_weight(data, weight, method):
def _agg_weight(data, weight, method, drop_negative):
"""Aggregate `data` by regions with weights, return indexed `pd.Series`"""

# only summation allowed with weights
Expand All @@ -181,6 +181,15 @@ def _agg_weight(data, weight, method):
if not data.droplevel(["variable", "unit"]).index.equals(weight.index):
raise ValueError("Inconsistent index between variable and weight!")

if drop_negative is True:
if any(i < 0 for i in weight):
logger.warning("Some of the weights are negative. All data with negative values will be dropped. "
"If you don't want the values to be dropped please use the keyword drop_negative=False")
# Drop negative weights
for index, value in weight.items():
if value < 0:
weight = weight.drop(labels=[index])

col1 = data.index.names.difference(["region"])
col2 = data.index.names.difference(["region", "variable", "unit"])
return (data * weight).groupby(col1).sum() / weight.groupby(col2).sum()
Expand Down
9 changes: 8 additions & 1 deletion pyam/core.py
Expand Up @@ -1323,6 +1323,7 @@ def aggregate_region(
method="sum",
weight=None,
append=False,
drop_negative=True
):
"""Aggregate a timeseries over a number of subregions
Expand Down Expand Up @@ -1351,6 +1352,8 @@ def aggregate_region(
append : bool, default False
append the aggregate timeseries to `self` and return None,
else return aggregate timeseries as new :class:`IamDataFrame`
drop_negative : bool, default True
drops negative weights for aggregation (e.g. emission is negative)
"""
_df = _aggregate_region(
self,
Expand All @@ -1360,6 +1363,7 @@ def aggregate_region(
components=components,
method=method,
weight=weight,
drop_negative=drop_negative,
)

# else, append to `self` or return as `IamDataFrame`
Expand All @@ -1379,6 +1383,7 @@ def check_aggregate_region(
method="sum",
weight=None,
exclude_on_fail=False,
drop_negative=True,
**kwargs,
):
"""Check whether a timeseries matches the aggregation across subregions
Expand All @@ -1404,12 +1409,14 @@ def check_aggregate_region(
(currently only supported with `method='sum'`)
exclude_on_fail : boolean, optional
flag scenarios failing validation as `exclude: True`
drop_negative : bool, default True
drops negative weights for aggregation (e.g. emission is negative)
kwargs : arguments for comparison of values
passed to :func:`numpy.isclose`
"""
# compute aggregate from subregions, return None if no subregions
df_subregions = _aggregate_region(
self, variable, region, subregions, components, method, weight
self, variable, region, subregions, components, method, weight, drop_negative
)

if df_subregions is None:
Expand Down
29 changes: 28 additions & 1 deletion tests/test_feature_aggregate.py
Expand Up @@ -5,7 +5,6 @@
import pandas as pd
from pyam import check_aggregate, IamDataFrame, IAMC_IDX
from pyam.testing import assert_iamframe_equal

from conftest import TEST_YEARS, DTS_MAPPING

LONG_IDX = IAMC_IDX + ["year"]
Expand Down Expand Up @@ -53,6 +52,20 @@
columns=["variable", "unit"] + TEST_YEARS,
)

NEG_WEIGHTS_DF = pd.DataFrame(
[
["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2005, -4.0],
["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2010, 5.0],
["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2005, 2.0],
["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2010, 3.0],
["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2005, 6.0],
["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2010, 6.0],
["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2005, 3.0],
["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2010, 4.0],
],
columns=LONG_IDX + ["value"],
)


@pytest.mark.parametrize(
"variable,data",
Expand Down Expand Up @@ -299,6 +312,20 @@ def test_aggregate_region_with_components(simple_df):
assert _df.check_aggregate_region(v, components=["foo"]) is None


@pytest.mark.parametrize(
"data, variable, weight",
(
(NEG_WEIGHTS_DF, 'Price|Carbon', 'Emissions|CO2'),
),
)
def test_agg_weight(data, variable, weight):

test_1 = IamDataFrame(data).aggregate_region(variable, weight=weight, drop_negative=False)._data
test_2 = IamDataFrame(data).aggregate_region(variable, weight=weight, drop_negative=True)._data

assert not test_1.equals(test_2)


def test_aggregate_region_with_weights(simple_df):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
Expand Down

0 comments on commit c3bd282

Please sign in to comment.