Support initializing with meta dataframe in long format (#801)

IAMconsortium · Nov 24, 2023 · 6583339 · 6583339
1 parent 95ebfce
commit 6583339
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 14 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,7 +1,8 @@
 # Next Release
 
-- [#796](https://github.com/IAMconsortium/pyam/pull/796] Raise explicit error message if no connection to IIASA manager service
-- [#794](https://github.com/IAMconsortium/pyam/pull/794] Fixed wrong color codes for AR6 Illustrative Pathways
+- [#801](https://github.com/IAMconsortium/pyam/pull/801) Support initializing with `meta` dataframe in long format
+- [#796](https://github.com/IAMconsortium/pyam/pull/796) Raise explicit error message if no connection to IIASA manager service
+- [#794](https://github.com/IAMconsortium/pyam/pull/794) Fixed wrong color codes for AR6 Illustrative Pathways
 
 # Release v2.0.0
 

diff --git a/pyam/core.py b/pyam/core.py
@@ -84,12 +84,13 @@ class IamDataFrame(object):
 
     Parameters
     ----------
-    data : :class:`pandas.DataFrame` or file-like object as str or :class:`pathlib.Path`
+    data : :class:`pandas.DataFrame`, :class:`pathlib.Path` or file-like object
         Scenario timeseries data following the IAMC data format or
         a supported variation as pandas object or a path to a file.
     meta : :class:`pandas.DataFrame`, optional
-        A dataframe with suitable 'meta' indicators for the new instance.
-        The index will be downselected to scenarios present in `data`.
+        A dataframe with suitable 'meta' indicators in wide (indicator as column name)
+        or long (key/value columns) format.
+        The dataframe will be downselected to scenarios present in `data`.
     index : list, optional
         Columns to use for resulting IamDataFrame index.
     kwargs
@@ -147,10 +148,16 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
 
         # if meta is given explicitly, verify that index and column names are valid
         if meta is not None:
+            if meta.index.names == [None]:
+                meta.set_index(index, inplace=True)
             if not meta.index.names == index:
                 raise ValueError(
                     f"Incompatible `index={index}` with `meta.index={meta.index.names}`"
                 )
+            # if meta is in "long" format as key-value columns, cast to wide format
+            if len(meta.columns) == 2 and all(meta.columns == ["key", "value"]):
+                meta = meta.pivot(values="value", columns="key")
+                meta.columns.name = None
 
         # try casting to Path if file-like is string or LocalPath or pytest.LocalPath
         try:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -57,7 +57,7 @@
         ["model_a", "scen_b", 2, np.nan],
     ],
     columns=META_IDX + META_COLS,
-).set_index(META_IDX)
+)
 
 
 FULL_FEATURE_DF = pd.DataFrame(

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -33,13 +33,14 @@
     columns=["model", "scenario", "region", 2010, 2020],
 ).set_index(["model", "region"])
 
+
 META_DF = pd.DataFrame(
     [
-        ["model_a", "scen_a", 1],
-        ["model_a", "scen_b", np.nan],
-        ["model_a", "scen_c", 2],
+        ["model_a", "scen_a", 1, "foo"],
+        ["model_a", "scen_b", np.nan, "bar"],
+        ["model_a", "scen_c", 2, "baz"],
     ],
-    columns=META_IDX + ["foo"],
+    columns=META_IDX + ["number", "string"],
 ).set_index(META_IDX)
 
 
@@ -157,17 +158,45 @@ def test_init_df_with_extra_col(test_pd_df):
     pd.testing.assert_frame_equal(obs, exp)
 
 
-def test_init_df_with_meta(test_pd_df):
-    # pass explicit meta dataframe with a scenario that doesn't exist in data
-    df = IamDataFrame(test_pd_df, meta=META_DF[["foo"]])
+def test_init_df_with_meta_with_index(test_pd_df):
+    # pass indexed meta dataframe with a scenario that doesn't exist in data
+    df = IamDataFrame(test_pd_df, meta=META_DF)
+
+    # check that scenario not existing in data is removed during initialization
+    pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
+    assert df.scenario == ["scen_a", "scen_b"]
+
+
+def test_init_df_with_meta_no_index(test_pd_df):
+    # pass meta without index with a scenario that doesn't exist in data
+    df = IamDataFrame(test_pd_df, meta=META_DF.reset_index())
 
     # check that scenario not existing in data is removed during initialization
     pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
     assert df.scenario == ["scen_a", "scen_b"]
 
 
+def test_init_df_with_meta_key_value(test_pd_df):
+    # pass meta with key-value columns with a scenario that doesn't exist in data
+
+    meta_df = pd.DataFrame(
+        [
+            ["model_a", "scen_a", "number", 1],
+            ["model_a", "scen_a", "string", "foo"],
+            ["model_a", "scen_b", "string", "bar"],
+            ["model_a", "scen_c", "number", 2],
+        ],
+        columns=META_IDX + ["key", "value"],
+    )
+    df = IamDataFrame(test_pd_df, meta=meta_df)
+
+    # check that scenario not existing in data is removed during initialization
+    pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]], check_dtype=False)
+    assert df.scenario == ["scen_a", "scen_b"]
+
+
 def test_init_df_with_meta_exclude_raises(test_pd_df):
-    # pass explicit meta dataframe with a scenario that
+    # pass explicit meta dataframe with a legacy "exclude" column
     meta = META_DF.copy()
     meta["exclude"] = False
     with pytest.raises(ValueError, match="Illegal columns in `meta`: 'exclude'"):