IAMconsortium · danielhuppmann · Jun 11, 2021 · May 17, 2021 · May 17, 2021 · May 17, 2021
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -18,4 +18,5 @@ The following persons contributed to the development of the |pyam| framework:
 - Jarmo Kikstra `@jkikstra <https://github.com/jkikstra>`_
 - Michael Pimmer `@fonfon <https://github.com/fonfon>`_
 - Patrick Jürgens `@pjuergens <https://github.com/pjuergens>`_
-- Florian Maczek `@macflo8 <https://github.com/macflo8>`_
+- Florian Maczek `@macflo8 <https://github.com/macflo8>`_
+- Laura Wienpahl `@LauWien <https://github.com/LauWien>`_
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -4,6 +4,7 @@
 - [#541](https://github.com/IAMconsortium/pyam/pull/541) Support units in binary operations
 - [#538](https://github.com/IAMconsortium/pyam/pull/538) Add option to set defaults in binary operations
 - [#537](https://github.com/IAMconsortium/pyam/pull/537) Enhance binary ops to support numerical arguments
+- [#534](https://github.com/IAMconsortium/pyam/pull/534) Add feature to drop negative weights
 - [#532](https://github.com/IAMconsortium/pyam/pull/532) Add an option to skip existing intermediate variables when aggregating recursivly
 - [#533](https://github.com/IAMconsortium/pyam/pull/533) Add an `apply()` function for custom mathematical operations
 - [#527](https://github.com/IAMconsortium/pyam/pull/527) Add an in-dataframe basic mathematical operations `subtract`, `add`, `multiply`, `divide`

diff --git a/pyam/_aggregate.py b/pyam/_aggregate.py
@@ -95,7 +95,14 @@ def _aggregate_recursive(df, variable, recursive):
 
 
 def _aggregate_region(
-    df, variable, region, subregions=None, components=False, method="sum", weight=None
+    df,
+    variable,
+    region,
+    subregions=None,
+    components=False,
+    method="sum",
+    weight=None,
+    drop_negative_weights=True,
 ):
     """Internal implementation for aggregating data over subregions"""
     if not isstr(variable) and components is not False:
@@ -120,11 +127,20 @@ def _aggregate_region(
     subregion_df = df.filter(region=subregions)
     rows = subregion_df._apply_filters(variable=variable)
     if weight is None:
+
+        if drop_negative_weights is False:
+            raise ValueError(
+                "Dropping negative weights can only be used with `weights`!"
+            )
+
         _data = _group_and_agg(subregion_df._data[rows], "region", method=method)
     else:
         weight_rows = subregion_df._apply_filters(variable=weight)
         _data = _agg_weight(
-            subregion_df._data[rows], subregion_df._data[weight_rows], method
+            subregion_df._data[rows],
+            subregion_df._data[weight_rows],
+            method,
+            drop_negative_weights,
         )
 
     # if not `components=False`, add components at the `region` level
@@ -186,7 +202,7 @@ def _group_and_agg(df, by, method=np.sum):
     return df.groupby(cols).agg(_get_method_func(method))
 
 
-def _agg_weight(data, weight, method):
+def _agg_weight(data, weight, method, drop_negative_weights):
     """Aggregate `data` by regions with weights, return indexed `pd.Series`"""
 
     # only summation allowed with weights
@@ -198,9 +214,22 @@ def _agg_weight(data, weight, method):
     if not data.droplevel(["variable", "unit"]).index.equals(weight.index):
         raise ValueError("Inconsistent index between variable and weight!")
 
+    if drop_negative_weights is True:
+        if any(weight < 0):
+            logger.warning(
+                "Some of the weights are negative. "
+                "All data weighted by negative values will be dropped. "
+                "To apply both positive and negative weights to the data, "
+                "please use the keyword argument `drop_negative_weights=False`."
+            )
+            # Drop negative weights
+            weight[weight < 0] = None
+
     col1 = data.index.names.difference(["region"])
     col2 = data.index.names.difference(["region", "variable", "unit"])
-    return (data * weight).groupby(col1).sum() / weight.groupby(col2).sum()
+    return (data * weight).groupby(col1).apply(
+        pd.Series.sum, skipna=False
+    ) / weight.groupby(col2).sum()
 
 
 def _get_method_func(method):

diff --git a/pyam/core.py b/pyam/core.py
@@ -1339,6 +1339,7 @@ def aggregate_region(
         method="sum",
         weight=None,
         append=False,
+        drop_negative_weights=True,
     ):
         """Aggregate a timeseries over a number of subregions
 
@@ -1349,9 +1350,9 @@ def aggregate_region(
         ----------
         variable : str or list of str
             variable(s) to be aggregated
-        region : str, default 'World'
+        region : str, optional
             region to which data will be aggregated
-        subregions : list of str
+        subregions : list of str, optional
             list of subregions, defaults to all regions other than `region`
         components : bool or list of str, optional
             variables at the `region` level to be included in the aggregation
@@ -1361,12 +1362,14 @@ def aggregate_region(
         method : func or str, optional
             method to use for aggregation,
             e.g. :func:`numpy.mean`, :func:`numpy.sum`, 'min', 'max'
-        weight : str, default None
+        weight : str, optional
             variable to use as weight for the aggregation
             (currently only supported with `method='sum'`)
-        append : bool, default False
+        append : bool, optional
             append the aggregate timeseries to `self` and return None,
             else return aggregate timeseries as new :class:`IamDataFrame`
+        drop_negative_weights : bool, optional
+            removes any aggregated values that are computed using negative weights
 
         Returns
         -------
@@ -1377,6 +1380,7 @@ def aggregate_region(
         --------
         add : Add timeseries data items `a` and `b` along an `axis`
         aggregate : Aggregate timeseries data along the `variable` hierarchy.
+
         """
         _df = _aggregate_region(
             self,
@@ -1386,6 +1390,7 @@ def aggregate_region(
             components=components,
             method=method,
             weight=weight,
+            drop_negative_weights=drop_negative_weights,
         )
 
         # else, append to `self` or return as `IamDataFrame`
@@ -1405,6 +1410,7 @@ def check_aggregate_region(
         method="sum",
         weight=None,
         exclude_on_fail=False,
+        drop_negative_weights=True,
         **kwargs,
     ):
         """Check whether a timeseries matches the aggregation across subregions
@@ -1413,11 +1419,11 @@ def check_aggregate_region(
         ----------
         variable : str or list of str
             variable(s) to be checked for matching aggregation of subregions
-        region : str, default 'World'
+        region : str, optional
             region to be checked for matching aggregation of subregions
-        subregions : list of str
+        subregions : list of str, optional
             list of subregions, defaults to all regions other than `region`
-        components : bool or list of str, default False
+        components : bool or list of str, optional
             variables at the `region` level to be included in the aggregation
             (ignored if False); if `True`, use all sub-categories of `variable`
             included in `region` but not in any of the `subregions`;
@@ -1430,12 +1436,21 @@ def check_aggregate_region(
             (currently only supported with `method='sum'`)
         exclude_on_fail : boolean, optional
             flag scenarios failing validation as `exclude: True`
+        drop_negative_weights : bool, optional
+            removes any aggregated values that are computed using negative weights
         kwargs : arguments for comparison of values
             passed to :func:`numpy.isclose`
         """
         # compute aggregate from subregions, return None if no subregions
         df_subregions = _aggregate_region(
-            self, variable, region, subregions, components, method, weight
+            self,
+            variable,
+            region,
+            subregions,
+            components,
+            method,
+            weight,
+            drop_negative_weights,
         )
 
         if df_subregions is None:

diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -5,7 +5,6 @@
 import pandas as pd
 from pyam import check_aggregate, IamDataFrame, IAMC_IDX
 from pyam.testing import assert_iamframe_equal
-
 from conftest import TEST_YEARS, DTS_MAPPING
 
 LONG_IDX = IAMC_IDX + ["year"]
@@ -42,6 +41,20 @@
     columns=LONG_IDX + ["value"],
 )
 
+NEG_WEIGHTS_DF = pd.DataFrame(
+    [
+        ["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2005, -4.0],
+        ["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2010, 5.0],
+        ["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2005, 2.0],
+        ["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2010, 3.0],
+        ["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2005, 6.0],
+        ["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2010, 6.0],
+        ["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2005, 3.0],
+        ["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2010, 4.0],
+    ],
+    columns=LONG_IDX + ["value"],
+)
+
 
 @pytest.mark.parametrize(
     "variable,data",
@@ -300,6 +313,34 @@ def test_aggregate_region_with_components(simple_df):
     assert _df.check_aggregate_region(v, components=["foo"]) is None
 
 
+def test_agg_weight():
+    variable = "Price|Carbon"
+    weight = "Emissions|CO2"
+    # negative weights should be dropped on default
+    obs_1 = IamDataFrame(NEG_WEIGHTS_DF).aggregate_region(variable, weight=weight)._data
+    exp_1 = np.array([5.25])
+    np.testing.assert_array_equal(obs_1.values, exp_1)
+
+    # negative weights shouldn't be dropped if drop_negative_weights=False
+    obs_2 = (
+        IamDataFrame(NEG_WEIGHTS_DF)
+        .aggregate_region(variable, weight=weight, drop_negative_weights=False)
+        ._data
+    )
+    exp_2 = np.array([9, 5.25])
+    np.testing.assert_array_equal(obs_2.values, exp_2)
+
+
+def test_aggregate_region_with_no_weights_drop_negative_weights_raises(simple_df):
+    # dropping negative weights can only be used with weight
+    pytest.raises(
+        ValueError,
+        simple_df.aggregate_region,
+        "Price|Carbon",
+        drop_negative_weights=False,
+    )
+
+
 def test_aggregate_region_with_weights(simple_df):
     # carbon price shouldn't be summed but be weighted by emissions
     v = "Price|Carbon"