Add attributes to access index dimensions (#432)

IAMconsortium · Oct 15, 2020 · 97f6c76 · 97f6c76
1 parent 6a0200c
commit 97f6c76
Show file tree

Hide file tree

Showing 5 changed files with 83 additions and 13 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,5 +1,14 @@
 # Next release
 
+## API changes
+
+PR [#432](https://github.com/IAMconsortium/pyam/pull/432) added attributes to
+access the list of (unique) items of each index dimension
+(`model`, `scenario`, ...).
+The PR also marked as deprecated the equivalent functions
+(`models()`, `scenarios()`, ...). The new behaviour is closer
+(though still different) to what a pandas user would expect. 
+
 ## Notes
 
 PR [#420](https://github.com/IAMconsortium/pyam/pull/420) added
@@ -13,6 +22,7 @@ via getter and setter functions.
 
 - [#437](https://github.com/IAMconsortium/pyam/pull/437) Improved test for appending mismatched timeseries
 - [#436](https://github.com/IAMconsortium/pyam/pull/436) Raise an error with appending mismatching timeseries index dimensions
+- [#432](https://github.com/IAMconsortium/pyam/pull/432) Add attributes to access index dimensions
 - [#429](https://github.com/IAMconsortium/pyam/pull/429) Fix return type of `validate()` after data refactoring
 - [#427](https://github.com/IAMconsortium/pyam/pull/427) Add an `info()` function and use in `print(IamDataFrame)`
 - [#424](https://github.com/IAMconsortium/pyam/pull/424) Add a tutorial reading results from a GAMS model (via a gdx file).

diff --git a/doc/source/tutorials/pyam_first_steps.ipynb b/doc/source/tutorials/pyam_first_steps.ipynb
@@ -148,7 +148,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.models()"
+    "df.model"
    ]
   },
   {
@@ -157,7 +157,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.scenarios()"
+    "df.scenario"
    ]
   },
   {
@@ -166,7 +166,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.regions()"
+    "df.region"
    ]
   },
   {
@@ -205,7 +205,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.filter(model='MESSAGE').scenarios()"
+    "df.filter(model='MESSAGE').scenario"
    ]
   },
   {
@@ -221,7 +221,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.filter(model='MESSAGE*').scenarios()"
+    "df.filter(model='MESSAGE*').scenario"
    ]
   },
   {

diff --git a/pyam/core.py b/pyam/core.py
@@ -154,6 +154,15 @@ def _init(self, data, **kwargs):
                 and meta_sheet in pd.ExcelFile(data).sheet_names:
             self.load_meta(data, sheet_name=meta_sheet)
 
+        # add time domain and extra-cols as attributes
+        if self.time_col == 'year':
+            setattr(self, 'year', get_index_levels(self._data, 'year'))
+        else:
+            setattr(self, 'time', pd.Index(
+                get_index_levels(self._data, 'time')))
+        for c in self.extra_cols:
+            setattr(self, c, get_index_levels(self._data, c))
+
         # execute user-defined code
         if 'exec' in run_control():
             self._execute_run_control()
@@ -236,9 +245,34 @@ def _execute_run_control(self):
                 f = getattr(mod, func)
                 f(self)
 
+    @property
+    def model(self):
+        """Return the list of (unique) model names"""
+        return get_index_levels(self.meta, 'model')
+
+    @property
+    def scenario(self):
+        """Return the list of (unique) scenario names"""
+        return get_index_levels(self.meta, 'scenario')
+
+    @property
+    def region(self):
+        """Return the list of (unique) regions"""
+        return get_index_levels(self._data, 'region')
+
+    @property
+    def variable(self):
+        """Return the list of (unique) variables"""
+        return get_index_levels(self._data, 'variable')
+
+    @property
+    def unit(self):
+        """Return the list of (unique) units"""
+        return get_index_levels(self._data, 'unit')
+
     @property
     def data(self):
-        """Return the timeseries data as long :class:`pandas.DataFrame`"""
+        """Return the timeseries data as a long :class:`pandas.DataFrame`"""
         if self.empty:  # reset_index fails on empty with `datetime` column
             return pd.DataFrame([], columns=self._LONG_IDX + ['value'])
         return self._data.reset_index()
@@ -292,14 +326,20 @@ def equals(self, other):
 
     def models(self):
         """Get a list of models"""
+        # TODO: deprecate in release >=0.9
+        deprecation_warning('Use the attribute `model` instead.')
         return pd.Series(self.meta.index.levels[0])
 
     def scenarios(self):
         """Get a list of scenarios"""
+        # TODO: deprecate in release >=0.9
+        deprecation_warning('Use the attribute `scenario` instead.')
         return pd.Series(self.meta.index.levels[1])
 
     def regions(self):
         """Get a list of regions"""
+        # TODO: deprecate in release >=0.9
+        deprecation_warning('Use the attribute `region` instead.')
         return pd.Series(get_index_levels(self._data, 'region'), name='region')
 
     def variables(self, include_units=False):
@@ -312,6 +352,7 @@ def variables(self, include_units=False):
         """
         if not include_units:
             _var = 'variable'
+            deprecation_warning('Use the attribute `variable` instead.')
             return pd.Series(get_index_levels(self._data, _var), name=_var)
 
         # else construct dataframe from variable and unit levels

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -50,12 +50,12 @@ def test_init_from_iamdf(test_df_year):
 
     # inplace-operations on the new object have effects on the original object
     df.rename(scenario={'scen_a': 'scen_foo'}, inplace=True)
-    assert all(test_df_year.scenarios().values == ['scen_b', 'scen_foo'])
+    assert test_df_year.scenario == ['scen_b', 'scen_foo']
 
     # overwrites on the new object do not have effects on the original object
     df = df.rename(scenario={'scen_foo': 'scen_bar'})
-    assert all(df.scenarios().values == ['scen_b', 'scen_bar'])
-    assert all(test_df_year.scenarios().values == ['scen_b', 'scen_foo'])
+    assert df.scenario == ['scen_b', 'scen_bar']
+    assert test_df_year.scenario == ['scen_b', 'scen_foo']
 
 
 def test_init_from_iamdf_raises(test_df_year):
@@ -108,7 +108,7 @@ def test_init_df_with_extra_col(test_pd_df):
                                   tdf, check_like=True)
 
 
-def test_init_empty_message(test_pd_df, caplog):
+def test_init_empty_message(caplog):
     IamDataFrame(data=df_empty)
     drop_message = (
         "Formatted data is empty!"
@@ -199,6 +199,25 @@ def test_get_item(test_df):
     assert test_df['model'].unique() == ['model_a']
 
 
+def test_index_attributes(test_df):
+    # assert that the
+    assert test_df.model == ['model_a']
+    assert test_df.scenario == ['scen_a', 'scen_b']
+    assert test_df.region == ['World']
+    assert test_df.variable == ['Primary Energy', 'Primary Energy|Coal']
+    assert test_df.unit == ['EJ/yr']
+    if test_df.time_col == 'year':
+        assert test_df.year == [2005, 2010]
+    else:
+        assert test_df.time.equals(pd.Index(test_df.data.time.unique()))
+
+
+def test_index_attributes_extra_col(test_pd_df):
+    test_pd_df['subannual'] = ['summer', 'summer', 'winter']
+    df = IamDataFrame(test_pd_df)
+    assert df.subannual == ['summer', 'winter']
+
+
 def test_model(test_df):
     exp = pd.Series(data=['model_a'], name='model')
     pd.testing.assert_series_equal(test_df.models(), exp)
@@ -503,8 +522,8 @@ def test_filter_year_with_time_col(test_pd_df):
 
 
 def test_filter_as_kwarg(test_df):
-    obs = list(test_df.filter(variable='Primary Energy|Coal').scenarios())
-    assert obs == ['scen_a']
+    _df = test_df.filter(variable='Primary Energy|Coal')
+    assert _df.scenario == ['scen_a']
 
 
 def test_filter_keep_false(test_df):

diff --git a/tests/test_feature_append_rename.py b/tests/test_feature_append_rename.py
@@ -66,7 +66,7 @@ def test_append_reconstructed_time(test_df):
         .rename({'scenario': {'scen_b': 'scen_c'}})
     other.time_col = other.time_col[0:1] + other.time_col[1:]
     test_df.append(other, inplace=True)
-    assert "scen_c" in test_df.scenarios().values
+    assert "scen_c" in test_df.scenario
 
 
 def test_append_same_scenario(test_df):