From 510851d06f263496e3c1a65bd3499129c28b3831 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 23 Dec 2019 16:32:39 +0100 Subject: [PATCH] add feature to "downscale" timeseries data to subregions (#313) --- RELEASE_NOTES.md | 3 +- pyam/core.py | 55 ++++++++++++++++++++++++++++++--- tests/conftest.py | 17 +++++----- tests/test_feature_aggregate.py | 10 +++--- tests/test_feature_downscale.py | 26 ++++++++++++++++ 5 files changed, 94 insertions(+), 17 deletions(-) create mode 100644 tests/test_feature_downscale.py diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index cc9fbbe24..2b55b5946 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -8,7 +8,8 @@ region-level. To keep the previous behaviour, add `components=True`. ## Individual Updates -- [#312](https://github.com/IAMconsortium/pyam/pull/312) allow passing list of variables to `aggregate` functions +- [#313](https://github.com/IAMconsortium/pyam/pull/313) Add feature to `downscale` timeseries data to subregions using another variable as proxy +- [#312](https://github.com/IAMconsortium/pyam/pull/312) Allow passing list of variables to `aggregate` functions - [#305](https://github.com/IAMconsortium/pyam/pull/305) Add `method` and `weight` options to the (region) aggregation functions - [#302](https://github.com/IAMconsortium/pyam/pull/302) Rework the tutorials - [#301](https://github.com/IAMconsortium/pyam/pull/301) Bugfix when using `to_excel()` with a `pd.ExcelWriter` diff --git a/pyam/core.py b/pyam/core.py index 042559c27..664a671c2 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -198,6 +198,7 @@ def variables(self, include_units=False): def append(self, other, ignore_meta_conflict=False, inplace=False, **kwargs): """Append any castable object to this IamDataFrame. + Columns in `other.meta` that are not in `self.meta` are always merged, duplicate region-variable-unit-year rows raise a ValueError. @@ -651,7 +652,7 @@ def rename(self, mapping=None, inplace=False, append=False, inplace: bool, default False if True, do operation inplace and return None append: bool, default False - if True, append renamed timeseries to IamDataFrame + append renamed timeseries to self; else, return new `IamDataFrame` check_duplicates: bool, default True check whether conflict between existing and renamed data exists. If True, raise ValueError; if False, rename and merge @@ -779,7 +780,7 @@ def aggregate(self, variable, components=None, method='sum', append=False): method: func or str, default 'sum' method to use for aggregation, e.g. np.mean, np.sum, 'min', 'max' append: bool, default False - append the aggregate timeseries to `data` and return None, + append the aggregate timeseries to `self` and return None, else return aggregate timeseries """ # list of variables require default components (no manual list) @@ -892,7 +893,7 @@ def aggregate_region(self, variable, region='World', subregions=None, variable to use as weight for the aggregation (currently only supported with `method='sum'`) append: bool, default False - append the aggregate timeseries to `data` and return None, + append the aggregate timeseries to `self` and return None, else return aggregate timeseries """ if not isstr(variable) and components is not False: @@ -1009,11 +1010,53 @@ def check_aggregate_region(self, variable, region='World', subregions=None, return IamDataFrame(diff, region=region).timeseries() - def _all_other_regions(self, region, variable): + def downscale_region(self, variable, proxy, region='World', + subregions=None, append=False): + """Downscale a timeseries to a number of subregions + + Parameters + ---------- + variable: str or list of str + variable(s) to be downscaled + proxy: str + variable to be used as proxy (i.e, weight) for the downscaling + region: str, default 'World' + dimension + subregions: list of str + list of subregions, defaults to all regions other than `region` + append: bool, default False + append the downscaled timeseries to `self` and return None, + else return downscaled data as new `IamDataFrame` + """ + # get default subregions if not specified + subregions = subregions or self._all_other_regions(region) + + # filter relevant data, transform to `pd.Series` with appropriate index + _df = self.data[self._apply_filters(variable=proxy, region=subregions)] + _proxy = _df.set_index(self._get_cols(['region', 'year'])).value + _total = _df.groupby(self._get_cols(['year'])).value.sum() + + _value = ( + self.data[self._apply_filters(variable=variable, region=region)] + .set_index(self._get_cols(['variable', 'unit', 'year'])).value + ) + + # compute downscaled data + _data = _value * _proxy / _total + + if append is True: + self.append(_data, inplace=True) + else: + df = IamDataFrame(_data) + df.meta = self.meta.loc[_make_index(df.data)] + return df + + def _all_other_regions(self, region, variable=None): """Return list of regions other than `region` containing `variable`""" rows = self._apply_filters(variable=variable) return set(self.data[rows].region) - set([region]) + def _variable_components(self, variable, level=0): """Get all components (sub-categories) of a variable for a given level @@ -1024,6 +1067,10 @@ def _variable_components(self, variable, level=0): return var_list[pattern_match(var_list, '{}|*'.format(variable), level=level)] + def _get_cols(self, cols): + """Return a list of columns of `self.data`""" + return META_IDX + cols + self.extra_cols + def check_internal_consistency(self, **kwargs): """Check whether a scenario ensemble is internally consistent diff --git a/tests/conftest.py b/tests/conftest.py index a09212433..9aba01ad4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,15 +26,15 @@ FULL_FEATURE_DF = pd.DataFrame([ - ['World', 'Primary Energy', 'EJ/y', 10, 15], - ['reg_a', 'Primary Energy', 'EJ/y', 6, 9], + ['World', 'Primary Energy', 'EJ/y', 12, 15], + ['reg_a', 'Primary Energy', 'EJ/y', 8, 9], ['reg_b', 'Primary Energy', 'EJ/y', 4, 6], - ['World', 'Primary Energy|Coal', 'EJ/y', 7, 10], - ['reg_a', 'Primary Energy|Coal', 'EJ/y', 5, 7], - ['reg_b', 'Primary Energy|Coal', 'EJ/y', 2, 3], + ['World', 'Primary Energy|Coal', 'EJ/y', 9, 10], + ['reg_a', 'Primary Energy|Coal', 'EJ/y', 6, 6], + ['reg_b', 'Primary Energy|Coal', 'EJ/y', 3, 4], ['World', 'Primary Energy|Wind', 'EJ/y', 3, 5], - ['reg_a', 'Primary Energy|Wind', 'EJ/y', 1, 2], - ['reg_b', 'Primary Energy|Wind', 'EJ/y', 2, 3], + ['reg_a', 'Primary Energy|Wind', 'EJ/y', 2, 3], + ['reg_b', 'Primary Energy|Wind', 'EJ/y', 1, 2], ['World', 'Emissions|CO2', 'EJ/y', 10, 14], ['World', 'Emissions|CO2|Energy', 'EJ/y', 6, 8], ['World', 'Emissions|CO2|AFOLU', 'EJ/y', 3, 4], @@ -48,6 +48,9 @@ ['World', 'Price|Carbon', 'USD/tCO2', 4, 27], ['reg_a', 'Price|Carbon', 'USD/tCO2', 1, 30], ['reg_b', 'Price|Carbon', 'USD/tCO2', 10, 21], + ['World', 'Population', 'm', 3, 5], + ['reg_a', 'Population', 'm', 2, 3], + ['reg_b', 'Population', 'm', 1, 2], ], columns=['region', 'variable', 'unit', 2005, 2010], ) diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index 6382490d3..46214e18d 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -11,12 +11,12 @@ LONG_IDX = IAMC_IDX + ['year'] PE_MAX_DF = pd.DataFrame([ - ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2005, 7.0], + ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2005, 9.0], ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2010, 10.0], - ['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2005, 5.0], - ['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2010, 7.0], - ['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2005, 2.0], - ['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2010, 3.0], + ['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2005, 6.0], + ['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2010, 6.0], + ['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2005, 3.0], + ['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2010, 4.0], ], columns=LONG_IDX + ['value'] diff --git a/tests/test_feature_downscale.py b/tests/test_feature_downscale.py new file mode 100644 index 000000000..1c1f599ce --- /dev/null +++ b/tests/test_feature_downscale.py @@ -0,0 +1,26 @@ +import pytest +import pandas as pd +import pyam + + +@pytest.mark.parametrize("variable", ( + ('Primary Energy'), + (['Primary Energy', 'Primary Energy|Coal']), +)) +def test_downscale_region(aggregate_df, variable): + df = aggregate_df + df.set_meta([1], name='test') + + regions = ['reg_a', 'reg_b'] + + # return as new IamDataFrame + obs_df = df.downscale_region(variable, proxy='Population') + exp_df = df.filter(variable=variable, region=regions) + assert pyam.compare(obs_df, exp_df).empty + pd.testing.assert_frame_equal(obs_df.meta, exp_df.meta) + + # append to `self` (after removing to-be-downscaled timeseries) + inplace_df = df.filter(variable=variable, region=regions, keep=False) + inplace_df.downscale_region(variable, proxy='Population', append=True) + assert pyam.compare(inplace_df, df).empty + pd.testing.assert_frame_equal(inplace_df.meta, df.meta)