From b176b6edbbc741038c894e0c2d5543a4c2f522fa Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 16 Nov 2020 22:00:02 +0100 Subject: [PATCH 1/9] use tmpdir fixture and Path in io-tests --- tests/conftest.py | 7 +++--- tests/test_io.py | 56 ++++++++++++++++++----------------------------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ae919821d..c02eb8255 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,7 @@ import matplotlib matplotlib.use('agg') +from pathlib import Path import os from requests.exceptions import ConnectionError import pytest @@ -23,9 +24,9 @@ TEST_API_NAME = 'IXSE_INTEGRATION_TEST' -here = os.path.dirname(os.path.realpath(__file__)) -IMAGE_BASELINE_DIR = os.path.join(here, 'expected_figs') -TEST_DATA_DIR = os.path.join(here, 'data') +here = Path(__file__).parent +IMAGE_BASELINE_DIR = here / 'expected_figs' +TEST_DATA_DIR = here / 'data' TEST_YEARS = [2005, 2010] diff --git a/tests/test_io.py b/tests/test_io.py index c3453a9bc..043891412 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path import pandas as pd import numpy as np import pytest @@ -11,29 +11,26 @@ FILTER_ARGS = dict(scenario='scen_a') -def test_io_csv(test_df): +def test_io_csv(test_df, tmpdir): # write to csv - file = 'testing_io_write_read.csv' + file = tmpdir / 'testing_io_write_read.csv' test_df.to_csv(file) - # read from csv + # read from csv and assert that `data` tables are equal import_df = IamDataFrame(file) - - # assert that `data` tables are equal and delete file pd.testing.assert_frame_equal(test_df.data, import_df.data) - os.remove(file) @pytest.mark.parametrize("meta_args", [ [{}, {}], [dict(include_meta='foo'), dict(meta_sheet_name='foo')] ]) -def test_io_xlsx(test_df, meta_args): +def test_io_xlsx(test_df, meta_args, tmpdir): # add column to `meta` test_df.set_meta(['a', 'b'], 'string') - # write to xlsx (direct file name and ExcelWriter, see bug report #300) - file = 'testing_io_write_read.xlsx' + # write to xlsx (direct file name and ExcelWriter, see #300) + file = tmpdir / 'testing_io_write_read.xlsx' for f in [file, pd.ExcelWriter(file)]: test_df.to_excel(f, **meta_args[0]) if isinstance(f, pd.ExcelWriter): @@ -44,34 +41,31 @@ def test_io_xlsx(test_df, meta_args): # assert that IamDataFrame instances are equal and delete file assert_iamframe_equal(test_df, import_df) - os.remove(file) -def test_init_df_with_na_unit(test_pd_df): +def test_init_df_with_na_unit(test_pd_df, tmpdir): # missing values in the unit column are replaced by an empty string test_pd_df.loc[1, 'unit'] = np.nan df = IamDataFrame(test_pd_df) assert df.unit == ['', 'EJ/yr'] # writing to file and importing as pandas returns `nan`, not empty string - file = 'na_unit.csv' + file = tmpdir / 'na_unit.csv' df.to_csv(file) df_csv = pd.read_csv(file) assert np.isnan(df_csv.loc[1, 'Unit']) - IamDataFrame('na_unit.csv') # reading from file as IamDataFrame works - os.remove(file) + IamDataFrame(file) # reading from file as IamDataFrame works - file = 'na_unit.xlsx' + file = tmpdir / 'na_unit.xlsx' df.to_excel(file) df_excel = pd.read_excel(file) assert np.isnan(df_excel.loc[1, 'Unit']) - IamDataFrame('na_unit.xlsx') # reading from file as IamDataFrame works - os.remove(file) + IamDataFrame(file) # reading from file as IamDataFrame works @pytest.mark.parametrize("args", [{}, dict(sheet_name='meta')]) def test_load_meta(test_df, args): - file = os.path.join(TEST_DATA_DIR, 'testing_metadata.xlsx') + file = TEST_DATA_DIR / 'testing_metadata.xlsx' test_df.load_meta(file, **args) obs = test_df.meta @@ -84,32 +78,24 @@ def test_load_meta(test_df, args): def test_load_ssp_database_downloaded_file(test_pd_df): exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas() - obs_df = IamDataFrame(os.path.join( - TEST_DATA_DIR, 'test_SSP_database_raw_download.xlsx') - ) + file = TEST_DATA_DIR / 'test_SSP_database_raw_download.xlsx' + obs_df = IamDataFrame(file) pd.testing.assert_frame_equal(obs_df.as_pandas(), exp) def test_load_rcp_database_downloaded_file(test_pd_df): exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas() - obs_df = IamDataFrame(os.path.join( - TEST_DATA_DIR, 'test_RCP_database_raw_download.xlsx') - ) + file = TEST_DATA_DIR / 'test_RCP_database_raw_download.xlsx' + obs_df = IamDataFrame(file) pd.testing.assert_frame_equal(obs_df.as_pandas(), exp) -def test_io_datapackage(test_df): - file = 'foo.zip' - - # add column to `meta` +def test_io_datapackage(test_df, tmpdir): + # add column to `meta` and write to datapackage + file = Path(tmpdir) / 'foo.zip' test_df.set_meta(['a', 'b'], 'string') - - # write to datapackage test_df.to_datapackage(file) - # read from csv + # read from csv assert that IamDataFrame instances are equal import_df = read_datapackage(file) - - # assert that IamDataFrame instances are equal and delete file assert_iamframe_equal(test_df, import_df) - os.remove(file) From b8e3103e8b285bd207a38f1235945ecab97f7fd6 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 16 Nov 2020 22:01:08 +0100 Subject: [PATCH 2/9] move error message for initializing from list to `core.py` --- pyam/core.py | 3 +++ pyam/utils.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index e886bf27e..7892acdde 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -131,6 +131,9 @@ def _init(self, data, **kwargs): # TODO read meta indicators from ixmp meta = None _data = read_ix(data, **kwargs) + elif islistable(data): + raise ValueError('Initializing from list is not supported, ' + 'use `IamDataFrame.append()` or `pyam.concat()`') else: meta = None logger.info('Reading file `{}`'.format(data)) diff --git a/pyam/utils.py b/pyam/utils.py index cb98067d5..f1e9c4707 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -120,9 +120,6 @@ def read_pandas(path, default_sheet='data', *args, **kwargs): def read_file(path, *args, **kwargs): """Read data from a file""" - if not isstr(path): - raise ValueError('Reading multiple files not supported, ' - 'use `IamDataFrame.append()` or `pyam.concat()`') format_kwargs = {} # extract kwargs that are intended for `format_data` for c in [i for i in IAMC_IDX + ['year', 'time', 'value'] if i in kwargs]: From b3e2861245c36b0beb30007779c1b9cdac4ed9a8 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Nov 2020 00:23:52 +0100 Subject: [PATCH 3/9] rework the casting workflow --- pyam/core.py | 35 +++++++++++++++++++++++++---------- pyam/utils.py | 5 ++++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index 7892acdde..b4380b882 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -123,21 +123,35 @@ def _init(self, data, **kwargs): # pop kwarg for meta_sheet_name (prior to reading data from file) meta_sheet = kwargs.pop('meta_sheet_name', 'meta') - # import data from pd.DataFrame or read from source + if islistable(data): + raise ValueError('Initializing from list is not supported, ' + 'use `IamDataFrame.append()` or `pyam.concat()`') + + # cast data from pandas if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series): meta = kwargs.pop('meta') if 'meta' in kwargs else None _data = format_data(data.copy(), **kwargs) + # read data from ixmp Platform instance elif has_ix and isinstance(data, ixmp.TimeSeries): # TODO read meta indicators from ixmp meta = None _data = read_ix(data, **kwargs) - elif islistable(data): - raise ValueError('Initializing from list is not supported, ' - 'use `IamDataFrame.append()` or `pyam.concat()`') + # read from file else: - meta = None - logger.info('Reading file `{}`'.format(data)) - _data = read_file(data, **kwargs) + try: + data = Path(data) # casting str or LocalPath to Path + is_file = data.is_file() + except TypeError: # `data` cannot be cast to Path + is_file = False + + if is_file: + meta = None + logger.info('Reading file `{}`'.format(data)) + _data = read_file(data, **kwargs) + # if not a readable file... + else: + msg = 'IamDataFrame constructor not properly called!' + raise ValueError(msg) _df, self.time_col, self.extra_cols = _data self._LONG_IDX = IAMC_IDX + [self.time_col] + self.extra_cols @@ -154,9 +168,10 @@ def _init(self, data, **kwargs): self.meta, ignore_meta_conflict=True) # if initializing from xlsx, try to load `meta` table from file - if isstr(data) and data.endswith('.xlsx') and meta_sheet is not False\ - and meta_sheet in pd.ExcelFile(data).sheet_names: - self.load_meta(data, sheet_name=meta_sheet) + if meta_sheet and isinstance(data, Path) and data.suffix == '.xlsx': + excel_file = pd.ExcelFile(data) + if meta_sheet in excel_file.sheet_names: + self.load_meta(excel_file, sheet_name=meta_sheet) # add time domain and extra-cols as attributes if self.time_col == 'year': diff --git a/pyam/utils.py b/pyam/utils.py index f1e9c4707..41f62effb 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -1,3 +1,4 @@ +from pathlib import Path import itertools import logging import string @@ -108,7 +109,9 @@ def write_sheet(writer, name, df, index=False): def read_pandas(path, default_sheet='data', *args, **kwargs): """Read a file and return a pandas.DataFrame""" - if path.endswith('csv'): + if isinstance(path, Path) and path.suffix == '.csv': + df = pd.read_csv(path, *args, **kwargs) + elif isstr(path) and path.endswith('csv'): df = pd.read_csv(path, *args, **kwargs) else: xl = pd.ExcelFile(path) From ea6f089354f72bb008327817e32b58a00bfcb7cc Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Nov 2020 16:49:40 +0100 Subject: [PATCH 4/9] put listable-error into correct place --- pyam/core.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index b4380b882..7944e38ed 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -123,10 +123,6 @@ def _init(self, data, **kwargs): # pop kwarg for meta_sheet_name (prior to reading data from file) meta_sheet = kwargs.pop('meta_sheet_name', 'meta') - if islistable(data): - raise ValueError('Initializing from list is not supported, ' - 'use `IamDataFrame.append()` or `pyam.concat()`') - # cast data from pandas if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series): meta = kwargs.pop('meta') if 'meta' in kwargs else None @@ -138,6 +134,12 @@ def _init(self, data, **kwargs): _data = read_ix(data, **kwargs) # read from file else: + if islistable(data): + raise ValueError( + 'Initializing from list is not supported, ' + 'use `IamDataFrame.append()` or `pyam.concat()`' + ) + try: data = Path(data) # casting str or LocalPath to Path is_file = data.is_file() From e4554b6de52f9e538ae38f101b227a53313afe01 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Nov 2020 16:50:14 +0100 Subject: [PATCH 5/9] simplify read_pandas (make path as str illegal) --- pyam/core.py | 2 +- pyam/utils.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index 7944e38ed..ce9d03c9b 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1657,7 +1657,7 @@ def load_meta(self, path, *args, **kwargs): any valid string path or :class:`pathlib.Path` """ # load from file - df = read_pandas(path, default_sheet='meta', *args, **kwargs) + df = read_pandas(Path(path), default_sheet='meta', *args, **kwargs) # cast model-scenario column headers to lower-case (if necessary) df = df.rename(columns=dict([(i.capitalize(), i) for i in META_IDX])) diff --git a/pyam/utils.py b/pyam/utils.py index 41f62effb..edcbafad8 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -111,8 +111,6 @@ def read_pandas(path, default_sheet='data', *args, **kwargs): """Read a file and return a pandas.DataFrame""" if isinstance(path, Path) and path.suffix == '.csv': df = pd.read_csv(path, *args, **kwargs) - elif isstr(path) and path.endswith('csv'): - df = pd.read_csv(path, *args, **kwargs) else: xl = pd.ExcelFile(path) if len(xl.sheet_names) > 1 and 'sheet_name' not in kwargs: From 4448433f20fdb13be935cc28e89cd024481ad618 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Nov 2020 19:01:33 +0100 Subject: [PATCH 6/9] update `map_regions()` function --- pyam/core.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index ce9d03c9b..2d453c948 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -132,14 +132,13 @@ def _init(self, data, **kwargs): # TODO read meta indicators from ixmp meta = None _data = read_ix(data, **kwargs) - # read from file else: if islistable(data): raise ValueError( 'Initializing from list is not supported, ' 'use `IamDataFrame.append()` or `pyam.concat()`' ) - + # read from file try: data = Path(data) # casting str or LocalPath to Path is_file = data.is_file() @@ -1831,9 +1830,8 @@ def map_regions(self, map_col, agg=None, copy_col=None, fname=None, inplace : bool, optional if True, do operation inplace and return None """ - models = self.meta.index.get_level_values('model').unique() fname = fname or run_control()['region_mapping']['default'] - mapping = read_pandas(fname).rename(str.lower, axis='columns') + mapping = read_pandas(Path(fname)).rename(str.lower, axis='columns') map_col = map_col.lower() ret = self.copy() if not inplace else self @@ -1842,7 +1840,7 @@ def map_regions(self, map_col, agg=None, copy_col=None, fname=None, # merge data dfs = [] - for model in models: + for model in self.model: df = _df[_df['model'] == model] _col = region_col or '{}.REGION'.format(model) _map = mapping.rename(columns={_col.lower(): 'region'}) From 4b1b7b416ebd8b45468684bb5ef4408ee528f92d Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Wed, 18 Nov 2020 06:45:34 +0100 Subject: [PATCH 7/9] make `meta` an explicit kwarg of the IamDataFrame initialization --- pyam/core.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index 2d453c948..e5b038614 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -77,7 +77,9 @@ class IamDataFrame(object): or data file with the required data columns. A pandas.DataFrame can have the required data as columns or index. Support is provided additionally for R-style data columns for years, - like "X2015", etc. + meta : :class:`pandas.DataFrame`, optional + A dataframe with suitable 'meta' indicators for the new instance. + The index will be downselected to scenarios present in `data`. kwargs If `value=`, melt column `` to 'value' and use `` name as 'variable'; or mapping of required columns (:code:`IAMC_IDX`) to @@ -87,10 +89,6 @@ class IamDataFrame(object): - multiple columns, to be concatenated by :code:`|` - a string to be used as value for this column - A :class:`pandas.DataFrame` with suitable `meta` indicators can be - passed as `meta=`. The index will be downselected to those - scenarios that have timeseries data. - Notes ----- When initializing an :class:`IamDataFrame` from an xlsx file, @@ -107,7 +105,7 @@ class IamDataFrame(object): This is intended behaviour and consistent with pandas but may be confusing for those who are not used to the pandas/Python universe. """ - def __init__(self, data, **kwargs): + def __init__(self, data, meta=None, **kwargs): """Initialize an instance of an IamDataFrame""" if isinstance(data, IamDataFrame): if kwargs: @@ -116,21 +114,19 @@ def __init__(self, data, **kwargs): for attr, value in data.__dict__.items(): setattr(self, attr, value) else: - self._init(data, **kwargs) + self._init(data, meta, **kwargs) - def _init(self, data, **kwargs): + def _init(self, data, meta=None, **kwargs): """Process data and set attributes for new instance""" # pop kwarg for meta_sheet_name (prior to reading data from file) meta_sheet = kwargs.pop('meta_sheet_name', 'meta') # cast data from pandas if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series): - meta = kwargs.pop('meta') if 'meta' in kwargs else None _data = format_data(data.copy(), **kwargs) # read data from ixmp Platform instance elif has_ix and isinstance(data, ixmp.TimeSeries): # TODO read meta indicators from ixmp - meta = None _data = read_ix(data, **kwargs) else: if islistable(data): @@ -146,7 +142,6 @@ def _init(self, data, **kwargs): is_file = False if is_file: - meta = None logger.info('Reading file `{}`'.format(data)) _data = read_file(data, **kwargs) # if not a readable file... From 7d3402ff97d5befbe6fcd4e534bbdf1e88ad96ef Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Wed, 18 Nov 2020 06:46:04 +0100 Subject: [PATCH 8/9] rewrite the IamDataFrame initialization docs --- pyam/core.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index e5b038614..5fb4b261f 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -64,7 +64,7 @@ class IamDataFrame(object): - """Scenario timeseries data following the IAMC-structure + """Scenario timeseries data following the IAMC data format The class provides a number of diagnostic features (including validation of data, completeness of variables provided), processing tools (e.g., @@ -72,11 +72,11 @@ class IamDataFrame(object): Parameters ---------- - data : ixmp.Scenario, pd.DataFrame or data file - an instance of an :class:`ixmp.Scenario`, :class:`pandas.DataFrame`, - or data file with the required data columns. - A pandas.DataFrame can have the required data as columns or index. - Support is provided additionally for R-style data columns for years, + data : :class:`pandas.DataFrame`, :class:`ixmp.Scenario`, + or file-like object as str or :class:`pathlib.Path` + Scenario timeseries data following the IAMC data format or + a supported variation as pandas object, a path to a file, + or a scenario of an ixmp instance. meta : :class:`pandas.DataFrame`, optional A dataframe with suitable 'meta' indicators for the new instance. The index will be downselected to scenarios present in `data`. @@ -91,10 +91,14 @@ class IamDataFrame(object): Notes ----- + A :class:`pandas.DataFrame` can have the required dimensions + as columns or index. + R-style integer column headers (i.e., `X2015`) are acceptable. + When initializing an :class:`IamDataFrame` from an xlsx file, |pyam| will per default look for the sheets 'data' and 'meta' to populate the respective tables. Custom sheet names can be specified with - kwargs :code:`sheet_name` ('data') and :code:`meta_sheet_name` ('meta') + kwargs :code:`sheet_name` ('data') and :code:`meta_sheet_name` ('meta'). Calling the class with :code:`meta_sheet_name=False` will skip the import of the 'meta' table. From 4c9f6ed86f711625f3e6fe68ae38875939e89636 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Wed, 18 Nov 2020 06:55:40 +0100 Subject: [PATCH 9/9] Add to release notes --- RELEASE_NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 8324afc3c..47aa1a138 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,7 @@ ## Individual updates +- [#458](https://github.com/IAMconsortium/pyam/pull/458) Enable `Path` for IamDataFrame initialization - [#454](https://github.com/IAMconsortium/pyam/pull/454) Enable dimensionless units and fix `info()` if IamDataFrame is empty - [#451](https://github.com/IAMconsortium/pyam/pull/451) Fix unit conversions from C to CO2eq - [#450](https://github.com/IAMconsortium/pyam/pull/450) Defer logging set-up to when the first logging message is generated