added feature drop negative weights

LauWien · May 17, 2021 · c3bd282 · c3bd282
1 parent d25cfb7
commit c3bd282
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 5 deletions.
diff --git a/pyam/_aggregate.py b/pyam/_aggregate.py
@@ -78,7 +78,7 @@ def _aggregate_recursive(df, variable):
 
 
 def _aggregate_region(
-    df, variable, region, subregions=None, components=False, method="sum", weight=None
+    df, variable, region, subregions=None, components=False, method="sum", weight=None, drop_negative=True
 ):
     """Internal implementation for aggregating data over subregions"""
     if not isstr(variable) and components is not False:
@@ -107,7 +107,7 @@ def _aggregate_region(
     else:
         weight_rows = subregion_df._apply_filters(variable=weight)
         _data = _agg_weight(
-            subregion_df._data[rows], subregion_df._data[weight_rows], method
+            subregion_df._data[rows], subregion_df._data[weight_rows], method, drop_negative
         )
 
     # if not `components=False`, add components at the `region` level
@@ -169,7 +169,7 @@ def _group_and_agg(df, by, method=np.sum):
     return df.groupby(cols).agg(_get_method_func(method))
 
 
-def _agg_weight(data, weight, method):
+def _agg_weight(data, weight, method, drop_negative):
     """Aggregate `data` by regions with weights, return indexed `pd.Series`"""
 
     # only summation allowed with weights
@@ -181,6 +181,15 @@ def _agg_weight(data, weight, method):
     if not data.droplevel(["variable", "unit"]).index.equals(weight.index):
         raise ValueError("Inconsistent index between variable and weight!")
 
+    if drop_negative is True:
+        if any(i < 0 for i in weight):
+            logger.warning("Some of the weights are negative. All data with negative values will be dropped. "
+                          "If you don't want the values to be dropped please use the keyword drop_negative=False")
+        # Drop negative weights
+            for index, value in weight.items():
+                if value < 0:
+                    weight = weight.drop(labels=[index])
+
     col1 = data.index.names.difference(["region"])
     col2 = data.index.names.difference(["region", "variable", "unit"])
     return (data * weight).groupby(col1).sum() / weight.groupby(col2).sum()

diff --git a/pyam/core.py b/pyam/core.py
@@ -1323,6 +1323,7 @@ def aggregate_region(
         method="sum",
         weight=None,
         append=False,
+        drop_negative=True
     ):
         """Aggregate a timeseries over a number of subregions
 
@@ -1351,6 +1352,8 @@ def aggregate_region(
         append : bool, default False
             append the aggregate timeseries to `self` and return None,
             else return aggregate timeseries as new :class:`IamDataFrame`
+        drop_negative : bool, default True
+            drops negative weights for aggregation (e.g. emission is negative)
         """
         _df = _aggregate_region(
             self,
@@ -1360,6 +1363,7 @@ def aggregate_region(
             components=components,
             method=method,
             weight=weight,
+            drop_negative=drop_negative,
         )
 
         # else, append to `self` or return as `IamDataFrame`
@@ -1379,6 +1383,7 @@ def check_aggregate_region(
         method="sum",
         weight=None,
         exclude_on_fail=False,
+        drop_negative=True,
         **kwargs,
     ):
         """Check whether a timeseries matches the aggregation across subregions
@@ -1404,12 +1409,14 @@ def check_aggregate_region(
             (currently only supported with `method='sum'`)
         exclude_on_fail : boolean, optional
             flag scenarios failing validation as `exclude: True`
+        drop_negative : bool, default True
+            drops negative weights for aggregation (e.g. emission is negative)
         kwargs : arguments for comparison of values
             passed to :func:`numpy.isclose`
         """
         # compute aggregate from subregions, return None if no subregions
         df_subregions = _aggregate_region(
-            self, variable, region, subregions, components, method, weight
+            self, variable, region, subregions, components, method, weight, drop_negative
         )
 
         if df_subregions is None:

diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -5,7 +5,6 @@
 import pandas as pd
 from pyam import check_aggregate, IamDataFrame, IAMC_IDX
 from pyam.testing import assert_iamframe_equal
-
 from conftest import TEST_YEARS, DTS_MAPPING
 
 LONG_IDX = IAMC_IDX + ["year"]
@@ -53,6 +52,20 @@
     columns=["variable", "unit"] + TEST_YEARS,
 )
 
+NEG_WEIGHTS_DF = pd.DataFrame(
+    [
+        ["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2005, -4.0],
+        ["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2010, 5.0],
+        ["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2005, 2.0],
+        ["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2010, 3.0],
+        ["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2005, 6.0],
+        ["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2010, 6.0],
+        ["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2005, 3.0],
+        ["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2010, 4.0],
+    ],
+    columns=LONG_IDX + ["value"],
+)
+
 
 @pytest.mark.parametrize(
     "variable,data",
@@ -299,6 +312,20 @@ def test_aggregate_region_with_components(simple_df):
     assert _df.check_aggregate_region(v, components=["foo"]) is None
 
 
+@pytest.mark.parametrize(
+    "data, variable, weight",
+    (
+        (NEG_WEIGHTS_DF, 'Price|Carbon', 'Emissions|CO2'),
+    ),
+)
+def test_agg_weight(data, variable, weight):
+
+    test_1 = IamDataFrame(data).aggregate_region(variable, weight=weight, drop_negative=False)._data
+    test_2 = IamDataFrame(data).aggregate_region(variable, weight=weight, drop_negative=True)._data
+
+    assert not test_1.equals(test_2)
+
+
 def test_aggregate_region_with_weights(simple_df):
     # carbon price shouldn't be summed but be weighted by emissions
     v = "Price|Carbon"