Skip to content

Commit

Permalink
add feature to "downscale" timeseries data to subregions (#313)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann committed Dec 23, 2019
1 parent 67dd7e9 commit 510851d
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 17 deletions.
3 changes: 2 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ region-level. To keep the previous behaviour, add `components=True`.

## Individual Updates

- [#312](https://github.com/IAMconsortium/pyam/pull/312) allow passing list of variables to `aggregate` functions
- [#313](https://github.com/IAMconsortium/pyam/pull/313) Add feature to `downscale` timeseries data to subregions using another variable as proxy
- [#312](https://github.com/IAMconsortium/pyam/pull/312) Allow passing list of variables to `aggregate` functions
- [#305](https://github.com/IAMconsortium/pyam/pull/305) Add `method` and `weight` options to the (region) aggregation functions
- [#302](https://github.com/IAMconsortium/pyam/pull/302) Rework the tutorials
- [#301](https://github.com/IAMconsortium/pyam/pull/301) Bugfix when using `to_excel()` with a `pd.ExcelWriter`
Expand Down
55 changes: 51 additions & 4 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def variables(self, include_units=False):
def append(self, other, ignore_meta_conflict=False, inplace=False,
**kwargs):
"""Append any castable object to this IamDataFrame.
Columns in `other.meta` that are not in `self.meta` are always merged,
duplicate region-variable-unit-year rows raise a ValueError.
Expand Down Expand Up @@ -651,7 +652,7 @@ def rename(self, mapping=None, inplace=False, append=False,
inplace: bool, default False
if True, do operation inplace and return None
append: bool, default False
if True, append renamed timeseries to IamDataFrame
append renamed timeseries to self; else, return new `IamDataFrame`
check_duplicates: bool, default True
check whether conflict between existing and renamed data exists.
If True, raise ValueError; if False, rename and merge
Expand Down Expand Up @@ -779,7 +780,7 @@ def aggregate(self, variable, components=None, method='sum', append=False):
method: func or str, default 'sum'
method to use for aggregation, e.g. np.mean, np.sum, 'min', 'max'
append: bool, default False
append the aggregate timeseries to `data` and return None,
append the aggregate timeseries to `self` and return None,
else return aggregate timeseries
"""
# list of variables require default components (no manual list)
Expand Down Expand Up @@ -892,7 +893,7 @@ def aggregate_region(self, variable, region='World', subregions=None,
variable to use as weight for the aggregation
(currently only supported with `method='sum'`)
append: bool, default False
append the aggregate timeseries to `data` and return None,
append the aggregate timeseries to `self` and return None,
else return aggregate timeseries
"""
if not isstr(variable) and components is not False:
Expand Down Expand Up @@ -1009,11 +1010,53 @@ def check_aggregate_region(self, variable, region='World', subregions=None,

return IamDataFrame(diff, region=region).timeseries()

def _all_other_regions(self, region, variable):
def downscale_region(self, variable, proxy, region='World',
subregions=None, append=False):
"""Downscale a timeseries to a number of subregions
Parameters
----------
variable: str or list of str
variable(s) to be downscaled
proxy: str
variable to be used as proxy (i.e, weight) for the downscaling
region: str, default 'World'
dimension
subregions: list of str
list of subregions, defaults to all regions other than `region`
append: bool, default False
append the downscaled timeseries to `self` and return None,
else return downscaled data as new `IamDataFrame`
"""
# get default subregions if not specified
subregions = subregions or self._all_other_regions(region)

# filter relevant data, transform to `pd.Series` with appropriate index
_df = self.data[self._apply_filters(variable=proxy, region=subregions)]
_proxy = _df.set_index(self._get_cols(['region', 'year'])).value
_total = _df.groupby(self._get_cols(['year'])).value.sum()

_value = (
self.data[self._apply_filters(variable=variable, region=region)]
.set_index(self._get_cols(['variable', 'unit', 'year'])).value
)

# compute downscaled data
_data = _value * _proxy / _total

if append is True:
self.append(_data, inplace=True)
else:
df = IamDataFrame(_data)
df.meta = self.meta.loc[_make_index(df.data)]
return df

def _all_other_regions(self, region, variable=None):
"""Return list of regions other than `region` containing `variable`"""
rows = self._apply_filters(variable=variable)
return set(self.data[rows].region) - set([region])


def _variable_components(self, variable, level=0):
"""Get all components (sub-categories) of a variable for a given level
Expand All @@ -1024,6 +1067,10 @@ def _variable_components(self, variable, level=0):
return var_list[pattern_match(var_list, '{}|*'.format(variable),
level=level)]

def _get_cols(self, cols):
"""Return a list of columns of `self.data`"""
return META_IDX + cols + self.extra_cols

def check_internal_consistency(self, **kwargs):
"""Check whether a scenario ensemble is internally consistent
Expand Down
17 changes: 10 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@


FULL_FEATURE_DF = pd.DataFrame([
['World', 'Primary Energy', 'EJ/y', 10, 15],
['reg_a', 'Primary Energy', 'EJ/y', 6, 9],
['World', 'Primary Energy', 'EJ/y', 12, 15],
['reg_a', 'Primary Energy', 'EJ/y', 8, 9],
['reg_b', 'Primary Energy', 'EJ/y', 4, 6],
['World', 'Primary Energy|Coal', 'EJ/y', 7, 10],
['reg_a', 'Primary Energy|Coal', 'EJ/y', 5, 7],
['reg_b', 'Primary Energy|Coal', 'EJ/y', 2, 3],
['World', 'Primary Energy|Coal', 'EJ/y', 9, 10],
['reg_a', 'Primary Energy|Coal', 'EJ/y', 6, 6],
['reg_b', 'Primary Energy|Coal', 'EJ/y', 3, 4],
['World', 'Primary Energy|Wind', 'EJ/y', 3, 5],
['reg_a', 'Primary Energy|Wind', 'EJ/y', 1, 2],
['reg_b', 'Primary Energy|Wind', 'EJ/y', 2, 3],
['reg_a', 'Primary Energy|Wind', 'EJ/y', 2, 3],
['reg_b', 'Primary Energy|Wind', 'EJ/y', 1, 2],
['World', 'Emissions|CO2', 'EJ/y', 10, 14],
['World', 'Emissions|CO2|Energy', 'EJ/y', 6, 8],
['World', 'Emissions|CO2|AFOLU', 'EJ/y', 3, 4],
Expand All @@ -48,6 +48,9 @@
['World', 'Price|Carbon', 'USD/tCO2', 4, 27],
['reg_a', 'Price|Carbon', 'USD/tCO2', 1, 30],
['reg_b', 'Price|Carbon', 'USD/tCO2', 10, 21],
['World', 'Population', 'm', 3, 5],
['reg_a', 'Population', 'm', 2, 3],
['reg_b', 'Population', 'm', 1, 2],
],
columns=['region', 'variable', 'unit', 2005, 2010],
)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
LONG_IDX = IAMC_IDX + ['year']

PE_MAX_DF = pd.DataFrame([
['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2005, 7.0],
['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2005, 9.0],
['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2010, 10.0],
['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2005, 5.0],
['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2010, 7.0],
['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2005, 2.0],
['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2010, 3.0],
['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2005, 6.0],
['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2010, 6.0],
['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2005, 3.0],
['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2010, 4.0],

],
columns=LONG_IDX + ['value']
Expand Down
26 changes: 26 additions & 0 deletions tests/test_feature_downscale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
import pandas as pd
import pyam


@pytest.mark.parametrize("variable", (
('Primary Energy'),
(['Primary Energy', 'Primary Energy|Coal']),
))
def test_downscale_region(aggregate_df, variable):
df = aggregate_df
df.set_meta([1], name='test')

regions = ['reg_a', 'reg_b']

# return as new IamDataFrame
obs_df = df.downscale_region(variable, proxy='Population')
exp_df = df.filter(variable=variable, region=regions)
assert pyam.compare(obs_df, exp_df).empty
pd.testing.assert_frame_equal(obs_df.meta, exp_df.meta)

# append to `self` (after removing to-be-downscaled timeseries)
inplace_df = df.filter(variable=variable, region=regions, keep=False)
inplace_df.downscale_region(variable, proxy='Population', append=True)
assert pyam.compare(inplace_df, df).empty
pd.testing.assert_frame_equal(inplace_df.meta, df.meta)

0 comments on commit 510851d

Please sign in to comment.