From 97f6c76551b44348f5d3e31569859d220559a767 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Thu, 15 Oct 2020 13:42:11 +0200 Subject: [PATCH] Add attributes to access index dimensions (#432) --- RELEASE_NOTES.md | 10 +++++ doc/source/tutorials/pyam_first_steps.ipynb | 10 ++--- pyam/core.py | 43 ++++++++++++++++++++- tests/test_core.py | 31 ++++++++++++--- tests/test_feature_append_rename.py | 2 +- 5 files changed, 83 insertions(+), 13 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 5ba519095..13a8db866 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,14 @@ # Next release +## API changes + +PR [#432](https://github.com/IAMconsortium/pyam/pull/432) added attributes to +access the list of (unique) items of each index dimension +(`model`, `scenario`, ...). +The PR also marked as deprecated the equivalent functions +(`models()`, `scenarios()`, ...). The new behaviour is closer +(though still different) to what a pandas user would expect. + ## Notes PR [#420](https://github.com/IAMconsortium/pyam/pull/420) added @@ -13,6 +22,7 @@ via getter and setter functions. - [#437](https://github.com/IAMconsortium/pyam/pull/437) Improved test for appending mismatched timeseries - [#436](https://github.com/IAMconsortium/pyam/pull/436) Raise an error with appending mismatching timeseries index dimensions +- [#432](https://github.com/IAMconsortium/pyam/pull/432) Add attributes to access index dimensions - [#429](https://github.com/IAMconsortium/pyam/pull/429) Fix return type of `validate()` after data refactoring - [#427](https://github.com/IAMconsortium/pyam/pull/427) Add an `info()` function and use in `print(IamDataFrame)` - [#424](https://github.com/IAMconsortium/pyam/pull/424) Add a tutorial reading results from a GAMS model (via a gdx file). diff --git a/doc/source/tutorials/pyam_first_steps.ipynb b/doc/source/tutorials/pyam_first_steps.ipynb index 972fba0a4..2844f7a3c 100644 --- a/doc/source/tutorials/pyam_first_steps.ipynb +++ b/doc/source/tutorials/pyam_first_steps.ipynb @@ -148,7 +148,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.models()" + "df.model" ] }, { @@ -157,7 +157,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.scenarios()" + "df.scenario" ] }, { @@ -166,7 +166,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.regions()" + "df.region" ] }, { @@ -205,7 +205,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.filter(model='MESSAGE').scenarios()" + "df.filter(model='MESSAGE').scenario" ] }, { @@ -221,7 +221,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.filter(model='MESSAGE*').scenarios()" + "df.filter(model='MESSAGE*').scenario" ] }, { diff --git a/pyam/core.py b/pyam/core.py index eac286275..c569c5001 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -154,6 +154,15 @@ def _init(self, data, **kwargs): and meta_sheet in pd.ExcelFile(data).sheet_names: self.load_meta(data, sheet_name=meta_sheet) + # add time domain and extra-cols as attributes + if self.time_col == 'year': + setattr(self, 'year', get_index_levels(self._data, 'year')) + else: + setattr(self, 'time', pd.Index( + get_index_levels(self._data, 'time'))) + for c in self.extra_cols: + setattr(self, c, get_index_levels(self._data, c)) + # execute user-defined code if 'exec' in run_control(): self._execute_run_control() @@ -236,9 +245,34 @@ def _execute_run_control(self): f = getattr(mod, func) f(self) + @property + def model(self): + """Return the list of (unique) model names""" + return get_index_levels(self.meta, 'model') + + @property + def scenario(self): + """Return the list of (unique) scenario names""" + return get_index_levels(self.meta, 'scenario') + + @property + def region(self): + """Return the list of (unique) regions""" + return get_index_levels(self._data, 'region') + + @property + def variable(self): + """Return the list of (unique) variables""" + return get_index_levels(self._data, 'variable') + + @property + def unit(self): + """Return the list of (unique) units""" + return get_index_levels(self._data, 'unit') + @property def data(self): - """Return the timeseries data as long :class:`pandas.DataFrame`""" + """Return the timeseries data as a long :class:`pandas.DataFrame`""" if self.empty: # reset_index fails on empty with `datetime` column return pd.DataFrame([], columns=self._LONG_IDX + ['value']) return self._data.reset_index() @@ -292,14 +326,20 @@ def equals(self, other): def models(self): """Get a list of models""" + # TODO: deprecate in release >=0.9 + deprecation_warning('Use the attribute `model` instead.') return pd.Series(self.meta.index.levels[0]) def scenarios(self): """Get a list of scenarios""" + # TODO: deprecate in release >=0.9 + deprecation_warning('Use the attribute `scenario` instead.') return pd.Series(self.meta.index.levels[1]) def regions(self): """Get a list of regions""" + # TODO: deprecate in release >=0.9 + deprecation_warning('Use the attribute `region` instead.') return pd.Series(get_index_levels(self._data, 'region'), name='region') def variables(self, include_units=False): @@ -312,6 +352,7 @@ def variables(self, include_units=False): """ if not include_units: _var = 'variable' + deprecation_warning('Use the attribute `variable` instead.') return pd.Series(get_index_levels(self._data, _var), name=_var) # else construct dataframe from variable and unit levels diff --git a/tests/test_core.py b/tests/test_core.py index 023e1810d..109830ba1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -50,12 +50,12 @@ def test_init_from_iamdf(test_df_year): # inplace-operations on the new object have effects on the original object df.rename(scenario={'scen_a': 'scen_foo'}, inplace=True) - assert all(test_df_year.scenarios().values == ['scen_b', 'scen_foo']) + assert test_df_year.scenario == ['scen_b', 'scen_foo'] # overwrites on the new object do not have effects on the original object df = df.rename(scenario={'scen_foo': 'scen_bar'}) - assert all(df.scenarios().values == ['scen_b', 'scen_bar']) - assert all(test_df_year.scenarios().values == ['scen_b', 'scen_foo']) + assert df.scenario == ['scen_b', 'scen_bar'] + assert test_df_year.scenario == ['scen_b', 'scen_foo'] def test_init_from_iamdf_raises(test_df_year): @@ -108,7 +108,7 @@ def test_init_df_with_extra_col(test_pd_df): tdf, check_like=True) -def test_init_empty_message(test_pd_df, caplog): +def test_init_empty_message(caplog): IamDataFrame(data=df_empty) drop_message = ( "Formatted data is empty!" @@ -199,6 +199,25 @@ def test_get_item(test_df): assert test_df['model'].unique() == ['model_a'] +def test_index_attributes(test_df): + # assert that the + assert test_df.model == ['model_a'] + assert test_df.scenario == ['scen_a', 'scen_b'] + assert test_df.region == ['World'] + assert test_df.variable == ['Primary Energy', 'Primary Energy|Coal'] + assert test_df.unit == ['EJ/yr'] + if test_df.time_col == 'year': + assert test_df.year == [2005, 2010] + else: + assert test_df.time.equals(pd.Index(test_df.data.time.unique())) + + +def test_index_attributes_extra_col(test_pd_df): + test_pd_df['subannual'] = ['summer', 'summer', 'winter'] + df = IamDataFrame(test_pd_df) + assert df.subannual == ['summer', 'winter'] + + def test_model(test_df): exp = pd.Series(data=['model_a'], name='model') pd.testing.assert_series_equal(test_df.models(), exp) @@ -503,8 +522,8 @@ def test_filter_year_with_time_col(test_pd_df): def test_filter_as_kwarg(test_df): - obs = list(test_df.filter(variable='Primary Energy|Coal').scenarios()) - assert obs == ['scen_a'] + _df = test_df.filter(variable='Primary Energy|Coal') + assert _df.scenario == ['scen_a'] def test_filter_keep_false(test_df): diff --git a/tests/test_feature_append_rename.py b/tests/test_feature_append_rename.py index 4d6fb5e05..61c459ec9 100644 --- a/tests/test_feature_append_rename.py +++ b/tests/test_feature_append_rename.py @@ -66,7 +66,7 @@ def test_append_reconstructed_time(test_df): .rename({'scenario': {'scen_b': 'scen_c'}}) other.time_col = other.time_col[0:1] + other.time_col[1:] test_df.append(other, inplace=True) - assert "scen_c" in test_df.scenarios().values + assert "scen_c" in test_df.scenario def test_append_same_scenario(test_df):