Skip to content

Commit

Permalink
Support initializing with meta dataframe in long format (#801)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann committed Nov 24, 2023
1 parent 95ebfce commit 6583339
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 14 deletions.
5 changes: 3 additions & 2 deletions RELEASE_NOTES.md
@@ -1,7 +1,8 @@
# Next Release

- [#796](https://github.com/IAMconsortium/pyam/pull/796] Raise explicit error message if no connection to IIASA manager service
- [#794](https://github.com/IAMconsortium/pyam/pull/794] Fixed wrong color codes for AR6 Illustrative Pathways
- [#801](https://github.com/IAMconsortium/pyam/pull/801) Support initializing with `meta` dataframe in long format
- [#796](https://github.com/IAMconsortium/pyam/pull/796) Raise explicit error message if no connection to IIASA manager service
- [#794](https://github.com/IAMconsortium/pyam/pull/794) Fixed wrong color codes for AR6 Illustrative Pathways

# Release v2.0.0

Expand Down
13 changes: 10 additions & 3 deletions pyam/core.py
Expand Up @@ -84,12 +84,13 @@ class IamDataFrame(object):
Parameters
----------
data : :class:`pandas.DataFrame` or file-like object as str or :class:`pathlib.Path`
data : :class:`pandas.DataFrame`, :class:`pathlib.Path` or file-like object
Scenario timeseries data following the IAMC data format or
a supported variation as pandas object or a path to a file.
meta : :class:`pandas.DataFrame`, optional
A dataframe with suitable 'meta' indicators for the new instance.
The index will be downselected to scenarios present in `data`.
A dataframe with suitable 'meta' indicators in wide (indicator as column name)
or long (key/value columns) format.
The dataframe will be downselected to scenarios present in `data`.
index : list, optional
Columns to use for resulting IamDataFrame index.
kwargs
Expand Down Expand Up @@ -147,10 +148,16 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):

# if meta is given explicitly, verify that index and column names are valid
if meta is not None:
if meta.index.names == [None]:
meta.set_index(index, inplace=True)
if not meta.index.names == index:
raise ValueError(
f"Incompatible `index={index}` with `meta.index={meta.index.names}`"
)
# if meta is in "long" format as key-value columns, cast to wide format
if len(meta.columns) == 2 and all(meta.columns == ["key", "value"]):
meta = meta.pivot(values="value", columns="key")
meta.columns.name = None

# try casting to Path if file-like is string or LocalPath or pytest.LocalPath
try:
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Expand Up @@ -57,7 +57,7 @@
["model_a", "scen_b", 2, np.nan],
],
columns=META_IDX + META_COLS,
).set_index(META_IDX)
)


FULL_FEATURE_DF = pd.DataFrame(
Expand Down
45 changes: 37 additions & 8 deletions tests/test_core.py
Expand Up @@ -33,13 +33,14 @@
columns=["model", "scenario", "region", 2010, 2020],
).set_index(["model", "region"])


META_DF = pd.DataFrame(
[
["model_a", "scen_a", 1],
["model_a", "scen_b", np.nan],
["model_a", "scen_c", 2],
["model_a", "scen_a", 1, "foo"],
["model_a", "scen_b", np.nan, "bar"],
["model_a", "scen_c", 2, "baz"],
],
columns=META_IDX + ["foo"],
columns=META_IDX + ["number", "string"],
).set_index(META_IDX)


Expand Down Expand Up @@ -157,17 +158,45 @@ def test_init_df_with_extra_col(test_pd_df):
pd.testing.assert_frame_equal(obs, exp)


def test_init_df_with_meta(test_pd_df):
# pass explicit meta dataframe with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF[["foo"]])
def test_init_df_with_meta_with_index(test_pd_df):
# pass indexed meta dataframe with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF)

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
assert df.scenario == ["scen_a", "scen_b"]


def test_init_df_with_meta_no_index(test_pd_df):
# pass meta without index with a scenario that doesn't exist in data
df = IamDataFrame(test_pd_df, meta=META_DF.reset_index())

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]])
assert df.scenario == ["scen_a", "scen_b"]


def test_init_df_with_meta_key_value(test_pd_df):
# pass meta with key-value columns with a scenario that doesn't exist in data

meta_df = pd.DataFrame(
[
["model_a", "scen_a", "number", 1],
["model_a", "scen_a", "string", "foo"],
["model_a", "scen_b", "string", "bar"],
["model_a", "scen_c", "number", 2],
],
columns=META_IDX + ["key", "value"],
)
df = IamDataFrame(test_pd_df, meta=meta_df)

# check that scenario not existing in data is removed during initialization
pd.testing.assert_frame_equal(df.meta, META_DF.iloc[[0, 1]], check_dtype=False)
assert df.scenario == ["scen_a", "scen_b"]


def test_init_df_with_meta_exclude_raises(test_pd_df):
# pass explicit meta dataframe with a scenario that
# pass explicit meta dataframe with a legacy "exclude" column
meta = META_DF.copy()
meta["exclude"] = False
with pytest.raises(ValueError, match="Illegal columns in `meta`: 'exclude'"):
Expand Down

0 comments on commit 6583339

Please sign in to comment.