Skip to content

Commit

Permalink
Add attributes to access index dimensions (#432)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann committed Oct 15, 2020
1 parent 6a0200c commit 97f6c76
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 13 deletions.
10 changes: 10 additions & 0 deletions RELEASE_NOTES.md
@@ -1,5 +1,14 @@
# Next release

## API changes

PR [#432](https://github.com/IAMconsortium/pyam/pull/432) added attributes to
access the list of (unique) items of each index dimension
(`model`, `scenario`, ...).
The PR also marked as deprecated the equivalent functions
(`models()`, `scenarios()`, ...). The new behaviour is closer
(though still different) to what a pandas user would expect.

## Notes

PR [#420](https://github.com/IAMconsortium/pyam/pull/420) added
Expand All @@ -13,6 +22,7 @@ via getter and setter functions.

- [#437](https://github.com/IAMconsortium/pyam/pull/437) Improved test for appending mismatched timeseries
- [#436](https://github.com/IAMconsortium/pyam/pull/436) Raise an error with appending mismatching timeseries index dimensions
- [#432](https://github.com/IAMconsortium/pyam/pull/432) Add attributes to access index dimensions
- [#429](https://github.com/IAMconsortium/pyam/pull/429) Fix return type of `validate()` after data refactoring
- [#427](https://github.com/IAMconsortium/pyam/pull/427) Add an `info()` function and use in `print(IamDataFrame)`
- [#424](https://github.com/IAMconsortium/pyam/pull/424) Add a tutorial reading results from a GAMS model (via a gdx file).
Expand Down
10 changes: 5 additions & 5 deletions doc/source/tutorials/pyam_first_steps.ipynb
Expand Up @@ -148,7 +148,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.models()"
"df.model"
]
},
{
Expand All @@ -157,7 +157,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.scenarios()"
"df.scenario"
]
},
{
Expand All @@ -166,7 +166,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.regions()"
"df.region"
]
},
{
Expand Down Expand Up @@ -205,7 +205,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.filter(model='MESSAGE').scenarios()"
"df.filter(model='MESSAGE').scenario"
]
},
{
Expand All @@ -221,7 +221,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.filter(model='MESSAGE*').scenarios()"
"df.filter(model='MESSAGE*').scenario"
]
},
{
Expand Down
43 changes: 42 additions & 1 deletion pyam/core.py
Expand Up @@ -154,6 +154,15 @@ def _init(self, data, **kwargs):
and meta_sheet in pd.ExcelFile(data).sheet_names:
self.load_meta(data, sheet_name=meta_sheet)

# add time domain and extra-cols as attributes
if self.time_col == 'year':
setattr(self, 'year', get_index_levels(self._data, 'year'))
else:
setattr(self, 'time', pd.Index(
get_index_levels(self._data, 'time')))
for c in self.extra_cols:
setattr(self, c, get_index_levels(self._data, c))

# execute user-defined code
if 'exec' in run_control():
self._execute_run_control()
Expand Down Expand Up @@ -236,9 +245,34 @@ def _execute_run_control(self):
f = getattr(mod, func)
f(self)

@property
def model(self):
"""Return the list of (unique) model names"""
return get_index_levels(self.meta, 'model')

@property
def scenario(self):
"""Return the list of (unique) scenario names"""
return get_index_levels(self.meta, 'scenario')

@property
def region(self):
"""Return the list of (unique) regions"""
return get_index_levels(self._data, 'region')

@property
def variable(self):
"""Return the list of (unique) variables"""
return get_index_levels(self._data, 'variable')

@property
def unit(self):
"""Return the list of (unique) units"""
return get_index_levels(self._data, 'unit')

@property
def data(self):
"""Return the timeseries data as long :class:`pandas.DataFrame`"""
"""Return the timeseries data as a long :class:`pandas.DataFrame`"""
if self.empty: # reset_index fails on empty with `datetime` column
return pd.DataFrame([], columns=self._LONG_IDX + ['value'])
return self._data.reset_index()
Expand Down Expand Up @@ -292,14 +326,20 @@ def equals(self, other):

def models(self):
"""Get a list of models"""
# TODO: deprecate in release >=0.9
deprecation_warning('Use the attribute `model` instead.')
return pd.Series(self.meta.index.levels[0])

def scenarios(self):
"""Get a list of scenarios"""
# TODO: deprecate in release >=0.9
deprecation_warning('Use the attribute `scenario` instead.')
return pd.Series(self.meta.index.levels[1])

def regions(self):
"""Get a list of regions"""
# TODO: deprecate in release >=0.9
deprecation_warning('Use the attribute `region` instead.')
return pd.Series(get_index_levels(self._data, 'region'), name='region')

def variables(self, include_units=False):
Expand All @@ -312,6 +352,7 @@ def variables(self, include_units=False):
"""
if not include_units:
_var = 'variable'
deprecation_warning('Use the attribute `variable` instead.')
return pd.Series(get_index_levels(self._data, _var), name=_var)

# else construct dataframe from variable and unit levels
Expand Down
31 changes: 25 additions & 6 deletions tests/test_core.py
Expand Up @@ -50,12 +50,12 @@ def test_init_from_iamdf(test_df_year):

# inplace-operations on the new object have effects on the original object
df.rename(scenario={'scen_a': 'scen_foo'}, inplace=True)
assert all(test_df_year.scenarios().values == ['scen_b', 'scen_foo'])
assert test_df_year.scenario == ['scen_b', 'scen_foo']

# overwrites on the new object do not have effects on the original object
df = df.rename(scenario={'scen_foo': 'scen_bar'})
assert all(df.scenarios().values == ['scen_b', 'scen_bar'])
assert all(test_df_year.scenarios().values == ['scen_b', 'scen_foo'])
assert df.scenario == ['scen_b', 'scen_bar']
assert test_df_year.scenario == ['scen_b', 'scen_foo']


def test_init_from_iamdf_raises(test_df_year):
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_init_df_with_extra_col(test_pd_df):
tdf, check_like=True)


def test_init_empty_message(test_pd_df, caplog):
def test_init_empty_message(caplog):
IamDataFrame(data=df_empty)
drop_message = (
"Formatted data is empty!"
Expand Down Expand Up @@ -199,6 +199,25 @@ def test_get_item(test_df):
assert test_df['model'].unique() == ['model_a']


def test_index_attributes(test_df):
# assert that the
assert test_df.model == ['model_a']
assert test_df.scenario == ['scen_a', 'scen_b']
assert test_df.region == ['World']
assert test_df.variable == ['Primary Energy', 'Primary Energy|Coal']
assert test_df.unit == ['EJ/yr']
if test_df.time_col == 'year':
assert test_df.year == [2005, 2010]
else:
assert test_df.time.equals(pd.Index(test_df.data.time.unique()))


def test_index_attributes_extra_col(test_pd_df):
test_pd_df['subannual'] = ['summer', 'summer', 'winter']
df = IamDataFrame(test_pd_df)
assert df.subannual == ['summer', 'winter']


def test_model(test_df):
exp = pd.Series(data=['model_a'], name='model')
pd.testing.assert_series_equal(test_df.models(), exp)
Expand Down Expand Up @@ -503,8 +522,8 @@ def test_filter_year_with_time_col(test_pd_df):


def test_filter_as_kwarg(test_df):
obs = list(test_df.filter(variable='Primary Energy|Coal').scenarios())
assert obs == ['scen_a']
_df = test_df.filter(variable='Primary Energy|Coal')
assert _df.scenario == ['scen_a']


def test_filter_keep_false(test_df):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_feature_append_rename.py
Expand Up @@ -66,7 +66,7 @@ def test_append_reconstructed_time(test_df):
.rename({'scenario': {'scen_b': 'scen_c'}})
other.time_col = other.time_col[0:1] + other.time_col[1:]
test_df.append(other, inplace=True)
assert "scen_c" in test_df.scenarios().values
assert "scen_c" in test_df.scenario


def test_append_same_scenario(test_df):
Expand Down

0 comments on commit 97f6c76

Please sign in to comment.