From b176b6edbbc741038c894e0c2d5543a4c2f522fa Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 16 Nov 2020 22:00:02 +0100
Subject: [PATCH 1/9] use tmpdir fixture and Path in io-tests

---
 tests/conftest.py |  7 +++---
 tests/test_io.py  | 56 ++++++++++++++++++-----------------------------
 2 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index ae919821d..c02eb8255 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,7 @@
 import matplotlib
 matplotlib.use('agg')
 
+from pathlib import Path
 import os
 from requests.exceptions import ConnectionError
 import pytest
@@ -23,9 +24,9 @@
 TEST_API_NAME = 'IXSE_INTEGRATION_TEST'
 
 
-here = os.path.dirname(os.path.realpath(__file__))
-IMAGE_BASELINE_DIR = os.path.join(here, 'expected_figs')
-TEST_DATA_DIR = os.path.join(here, 'data')
+here = Path(__file__).parent
+IMAGE_BASELINE_DIR = here / 'expected_figs'
+TEST_DATA_DIR = here / 'data'
 
 
 TEST_YEARS = [2005, 2010]
diff --git a/tests/test_io.py b/tests/test_io.py
index c3453a9bc..043891412 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 import pandas as pd
 import numpy as np
 import pytest
@@ -11,29 +11,26 @@
 FILTER_ARGS = dict(scenario='scen_a')
 
 
-def test_io_csv(test_df):
+def test_io_csv(test_df, tmpdir):
     # write to csv
-    file = 'testing_io_write_read.csv'
+    file = tmpdir / 'testing_io_write_read.csv'
     test_df.to_csv(file)
 
-    # read from csv
+    # read from csv and assert that `data` tables are equal
     import_df = IamDataFrame(file)
-
-    # assert that `data` tables are equal and delete file
     pd.testing.assert_frame_equal(test_df.data, import_df.data)
-    os.remove(file)
 
 
 @pytest.mark.parametrize("meta_args", [
     [{}, {}],
     [dict(include_meta='foo'), dict(meta_sheet_name='foo')]
 ])
-def test_io_xlsx(test_df, meta_args):
+def test_io_xlsx(test_df, meta_args, tmpdir):
     # add column to `meta`
     test_df.set_meta(['a', 'b'], 'string')
 
-    # write to xlsx (direct file name and ExcelWriter, see bug report #300)
-    file = 'testing_io_write_read.xlsx'
+    # write to xlsx (direct file name and ExcelWriter, see #300)
+    file = tmpdir / 'testing_io_write_read.xlsx'
     for f in [file, pd.ExcelWriter(file)]:
         test_df.to_excel(f, **meta_args[0])
         if isinstance(f, pd.ExcelWriter):
@@ -44,34 +41,31 @@ def test_io_xlsx(test_df, meta_args):
 
         # assert that IamDataFrame instances are equal and delete file
         assert_iamframe_equal(test_df, import_df)
-        os.remove(file)
 
 
-def test_init_df_with_na_unit(test_pd_df):
+def test_init_df_with_na_unit(test_pd_df, tmpdir):
     # missing values in the unit column are replaced by an empty string
     test_pd_df.loc[1, 'unit'] = np.nan
     df = IamDataFrame(test_pd_df)
     assert df.unit == ['', 'EJ/yr']
 
     # writing to file and importing as pandas returns `nan`, not empty string
-    file = 'na_unit.csv'
+    file = tmpdir / 'na_unit.csv'
     df.to_csv(file)
     df_csv = pd.read_csv(file)
     assert np.isnan(df_csv.loc[1, 'Unit'])
-    IamDataFrame('na_unit.csv')  # reading from file as IamDataFrame works
-    os.remove(file)
+    IamDataFrame(file)  # reading from file as IamDataFrame works
 
-    file = 'na_unit.xlsx'
+    file = tmpdir / 'na_unit.xlsx'
     df.to_excel(file)
     df_excel = pd.read_excel(file)
     assert np.isnan(df_excel.loc[1, 'Unit'])
-    IamDataFrame('na_unit.xlsx')  # reading from file as IamDataFrame works
-    os.remove(file)
+    IamDataFrame(file)  # reading from file as IamDataFrame works
 
 
 @pytest.mark.parametrize("args", [{}, dict(sheet_name='meta')])
 def test_load_meta(test_df, args):
-    file = os.path.join(TEST_DATA_DIR, 'testing_metadata.xlsx')
+    file = TEST_DATA_DIR / 'testing_metadata.xlsx'
     test_df.load_meta(file, **args)
     obs = test_df.meta
 
@@ -84,32 +78,24 @@ def test_load_meta(test_df, args):
 
 def test_load_ssp_database_downloaded_file(test_pd_df):
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()
-    obs_df = IamDataFrame(os.path.join(
-        TEST_DATA_DIR, 'test_SSP_database_raw_download.xlsx')
-    )
+    file = TEST_DATA_DIR / 'test_SSP_database_raw_download.xlsx'
+    obs_df = IamDataFrame(file)
     pd.testing.assert_frame_equal(obs_df.as_pandas(), exp)
 
 
 def test_load_rcp_database_downloaded_file(test_pd_df):
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()
-    obs_df = IamDataFrame(os.path.join(
-        TEST_DATA_DIR, 'test_RCP_database_raw_download.xlsx')
-    )
+    file = TEST_DATA_DIR / 'test_RCP_database_raw_download.xlsx'
+    obs_df = IamDataFrame(file)
     pd.testing.assert_frame_equal(obs_df.as_pandas(), exp)
 
 
-def test_io_datapackage(test_df):
-    file = 'foo.zip'
-
-    # add column to `meta`
+def test_io_datapackage(test_df, tmpdir):
+    # add column to `meta` and write to datapackage
+    file = Path(tmpdir) / 'foo.zip'
     test_df.set_meta(['a', 'b'], 'string')
-
-    # write to datapackage
     test_df.to_datapackage(file)
 
-    # read from csv
+    # read from csv assert that IamDataFrame instances are equal
     import_df = read_datapackage(file)
-
-    # assert that IamDataFrame instances are equal and delete file
     assert_iamframe_equal(test_df, import_df)
-    os.remove(file)

From b8e3103e8b285bd207a38f1235945ecab97f7fd6 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 16 Nov 2020 22:01:08 +0100
Subject: [PATCH 2/9] move error message for initializing from list to
 `core.py`

---
 pyam/core.py  | 3 +++
 pyam/utils.py | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index e886bf27e..7892acdde 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -131,6 +131,9 @@ def _init(self, data, **kwargs):
             # TODO read meta indicators from ixmp
             meta = None
             _data = read_ix(data, **kwargs)
+        elif islistable(data):
+            raise ValueError('Initializing from list is not supported, '
+                             'use `IamDataFrame.append()` or `pyam.concat()`')
         else:
             meta = None
             logger.info('Reading file `{}`'.format(data))
diff --git a/pyam/utils.py b/pyam/utils.py
index cb98067d5..f1e9c4707 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -120,9 +120,6 @@ def read_pandas(path, default_sheet='data', *args, **kwargs):
 
 def read_file(path, *args, **kwargs):
     """Read data from a file"""
-    if not isstr(path):
-        raise ValueError('Reading multiple files not supported, '
-                         'use `IamDataFrame.append()` or `pyam.concat()`')
     format_kwargs = {}
     # extract kwargs that are intended for `format_data`
     for c in [i for i in IAMC_IDX + ['year', 'time', 'value'] if i in kwargs]:

From b3e2861245c36b0beb30007779c1b9cdac4ed9a8 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Tue, 17 Nov 2020 00:23:52 +0100
Subject: [PATCH 3/9] rework the casting workflow

---
 pyam/core.py  | 35 +++++++++++++++++++++++++----------
 pyam/utils.py |  5 ++++-
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 7892acdde..b4380b882 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -123,21 +123,35 @@ def _init(self, data, **kwargs):
         # pop kwarg for meta_sheet_name (prior to reading data from file)
         meta_sheet = kwargs.pop('meta_sheet_name', 'meta')
 
-        # import data from pd.DataFrame or read from source
+        if islistable(data):
+            raise ValueError('Initializing from list is not supported, '
+                             'use `IamDataFrame.append()` or `pyam.concat()`')
+
+        # cast data from pandas
         if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
             meta = kwargs.pop('meta') if 'meta' in kwargs else None
             _data = format_data(data.copy(), **kwargs)
+        # read data from ixmp Platform instance
         elif has_ix and isinstance(data, ixmp.TimeSeries):
             # TODO read meta indicators from ixmp
             meta = None
             _data = read_ix(data, **kwargs)
-        elif islistable(data):
-            raise ValueError('Initializing from list is not supported, '
-                             'use `IamDataFrame.append()` or `pyam.concat()`')
+        # read from file
         else:
-            meta = None
-            logger.info('Reading file `{}`'.format(data))
-            _data = read_file(data, **kwargs)
+            try:
+                data = Path(data)  # casting str or LocalPath to Path
+                is_file = data.is_file()
+            except TypeError:  # `data` cannot be cast to Path
+                is_file = False
+
+            if is_file:
+                meta = None
+                logger.info('Reading file `{}`'.format(data))
+                _data = read_file(data, **kwargs)
+            # if not a readable file...
+            else:
+                msg = 'IamDataFrame constructor not properly called!'
+                raise ValueError(msg)
 
         _df, self.time_col, self.extra_cols = _data
         self._LONG_IDX = IAMC_IDX + [self.time_col] + self.extra_cols
@@ -154,9 +168,10 @@ def _init(self, data, **kwargs):
                                    self.meta, ignore_meta_conflict=True)
 
         # if initializing from xlsx, try to load `meta` table from file
-        if isstr(data) and data.endswith('.xlsx') and meta_sheet is not False\
-                and meta_sheet in pd.ExcelFile(data).sheet_names:
-            self.load_meta(data, sheet_name=meta_sheet)
+        if meta_sheet and isinstance(data, Path) and data.suffix == '.xlsx':
+            excel_file = pd.ExcelFile(data)
+            if meta_sheet in excel_file.sheet_names:
+                self.load_meta(excel_file, sheet_name=meta_sheet)
 
         # add time domain and extra-cols as attributes
         if self.time_col == 'year':
diff --git a/pyam/utils.py b/pyam/utils.py
index f1e9c4707..41f62effb 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 import itertools
 import logging
 import string
@@ -108,7 +109,9 @@ def write_sheet(writer, name, df, index=False):
 
 def read_pandas(path, default_sheet='data', *args, **kwargs):
     """Read a file and return a pandas.DataFrame"""
-    if path.endswith('csv'):
+    if isinstance(path, Path) and path.suffix == '.csv':
+        df = pd.read_csv(path, *args, **kwargs)
+    elif isstr(path) and path.endswith('csv'):
         df = pd.read_csv(path, *args, **kwargs)
     else:
         xl = pd.ExcelFile(path)

From ea6f089354f72bb008327817e32b58a00bfcb7cc Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Tue, 17 Nov 2020 16:49:40 +0100
Subject: [PATCH 4/9] put listable-error into correct place

---
 pyam/core.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index b4380b882..7944e38ed 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -123,10 +123,6 @@ def _init(self, data, **kwargs):
         # pop kwarg for meta_sheet_name (prior to reading data from file)
         meta_sheet = kwargs.pop('meta_sheet_name', 'meta')
 
-        if islistable(data):
-            raise ValueError('Initializing from list is not supported, '
-                             'use `IamDataFrame.append()` or `pyam.concat()`')
-
         # cast data from pandas
         if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
             meta = kwargs.pop('meta') if 'meta' in kwargs else None
@@ -138,6 +134,12 @@ def _init(self, data, **kwargs):
             _data = read_ix(data, **kwargs)
         # read from file
         else:
+            if islistable(data):
+                raise ValueError(
+                    'Initializing from list is not supported, '
+                    'use `IamDataFrame.append()` or `pyam.concat()`'
+                )
+
             try:
                 data = Path(data)  # casting str or LocalPath to Path
                 is_file = data.is_file()

From e4554b6de52f9e538ae38f101b227a53313afe01 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Tue, 17 Nov 2020 16:50:14 +0100
Subject: [PATCH 5/9] simplify read_pandas (make path as str illegal)

---
 pyam/core.py  | 2 +-
 pyam/utils.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 7944e38ed..ce9d03c9b 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1657,7 +1657,7 @@ def load_meta(self, path, *args, **kwargs):
             any valid string path or :class:`pathlib.Path`
         """
         # load from file
-        df = read_pandas(path, default_sheet='meta', *args, **kwargs)
+        df = read_pandas(Path(path), default_sheet='meta', *args, **kwargs)
 
         # cast model-scenario column headers to lower-case (if necessary)
         df = df.rename(columns=dict([(i.capitalize(), i) for i in META_IDX]))
diff --git a/pyam/utils.py b/pyam/utils.py
index 41f62effb..edcbafad8 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -111,8 +111,6 @@ def read_pandas(path, default_sheet='data', *args, **kwargs):
     """Read a file and return a pandas.DataFrame"""
     if isinstance(path, Path) and path.suffix == '.csv':
         df = pd.read_csv(path, *args, **kwargs)
-    elif isstr(path) and path.endswith('csv'):
-        df = pd.read_csv(path, *args, **kwargs)
     else:
         xl = pd.ExcelFile(path)
         if len(xl.sheet_names) > 1 and 'sheet_name' not in kwargs:

From 4448433f20fdb13be935cc28e89cd024481ad618 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Tue, 17 Nov 2020 19:01:33 +0100
Subject: [PATCH 6/9] update `map_regions()` function

---
 pyam/core.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index ce9d03c9b..2d453c948 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -132,14 +132,13 @@ def _init(self, data, **kwargs):
             # TODO read meta indicators from ixmp
             meta = None
             _data = read_ix(data, **kwargs)
-        # read from file
         else:
             if islistable(data):
                 raise ValueError(
                     'Initializing from list is not supported, '
                     'use `IamDataFrame.append()` or `pyam.concat()`'
                 )
-
+            # read from file
             try:
                 data = Path(data)  # casting str or LocalPath to Path
                 is_file = data.is_file()
@@ -1831,9 +1830,8 @@ def map_regions(self, map_col, agg=None, copy_col=None, fname=None,
         inplace : bool, optional
             if True, do operation inplace and return None
         """
-        models = self.meta.index.get_level_values('model').unique()
         fname = fname or run_control()['region_mapping']['default']
-        mapping = read_pandas(fname).rename(str.lower, axis='columns')
+        mapping = read_pandas(Path(fname)).rename(str.lower, axis='columns')
         map_col = map_col.lower()
 
         ret = self.copy() if not inplace else self
@@ -1842,7 +1840,7 @@ def map_regions(self, map_col, agg=None, copy_col=None, fname=None,
 
         # merge data
         dfs = []
-        for model in models:
+        for model in self.model:
             df = _df[_df['model'] == model]
             _col = region_col or '{}.REGION'.format(model)
             _map = mapping.rename(columns={_col.lower(): 'region'})

From 4b1b7b416ebd8b45468684bb5ef4408ee528f92d Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Wed, 18 Nov 2020 06:45:34 +0100
Subject: [PATCH 7/9] make `meta` an explicit kwarg of the IamDataFrame
 initialization

---
 pyam/core.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 2d453c948..e5b038614 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -77,7 +77,9 @@ class IamDataFrame(object):
         or data file with the required data columns.
         A pandas.DataFrame can have the required data as columns or index.
         Support is provided additionally for R-style data columns for years,
-        like "X2015", etc.
+    meta : :class:`pandas.DataFrame`, optional
+        A dataframe with suitable 'meta' indicators for the new instance.
+        The index will be downselected to scenarios present in `data`.
     kwargs
         If `value=<col>`, melt column `<col>` to 'value' and use `<col>` name
         as 'variable'; or mapping of required columns (:code:`IAMC_IDX`) to
@@ -87,10 +89,6 @@ class IamDataFrame(object):
         - multiple columns, to be concatenated by :code:`|`
         - a string to be used as value for this column
 
-        A :class:`pandas.DataFrame` with suitable `meta` indicators can be
-        passed as `meta=<df>`. The index will be downselected to those
-        scenarios that have timeseries data.
-
     Notes
     -----
     When initializing an :class:`IamDataFrame` from an xlsx file,
@@ -107,7 +105,7 @@ class IamDataFrame(object):
     This is intended behaviour and consistent with pandas but may be confusing
     for those who are not used to the pandas/Python universe.
     """
-    def __init__(self, data, **kwargs):
+    def __init__(self, data, meta=None, **kwargs):
         """Initialize an instance of an IamDataFrame"""
         if isinstance(data, IamDataFrame):
             if kwargs:
@@ -116,21 +114,19 @@ def __init__(self, data, **kwargs):
             for attr, value in data.__dict__.items():
                 setattr(self, attr, value)
         else:
-            self._init(data, **kwargs)
+            self._init(data, meta, **kwargs)
 
-    def _init(self, data, **kwargs):
+    def _init(self, data, meta=None, **kwargs):
         """Process data and set attributes for new instance"""
         # pop kwarg for meta_sheet_name (prior to reading data from file)
         meta_sheet = kwargs.pop('meta_sheet_name', 'meta')
 
         # cast data from pandas
         if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
-            meta = kwargs.pop('meta') if 'meta' in kwargs else None
             _data = format_data(data.copy(), **kwargs)
         # read data from ixmp Platform instance
         elif has_ix and isinstance(data, ixmp.TimeSeries):
             # TODO read meta indicators from ixmp
-            meta = None
             _data = read_ix(data, **kwargs)
         else:
             if islistable(data):
@@ -146,7 +142,6 @@ def _init(self, data, **kwargs):
                 is_file = False
 
             if is_file:
-                meta = None
                 logger.info('Reading file `{}`'.format(data))
                 _data = read_file(data, **kwargs)
             # if not a readable file...

From 7d3402ff97d5befbe6fcd4e534bbdf1e88ad96ef Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Wed, 18 Nov 2020 06:46:04 +0100
Subject: [PATCH 8/9] rewrite the IamDataFrame initialization docs

---
 pyam/core.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index e5b038614..5fb4b261f 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -64,7 +64,7 @@
 
 
 class IamDataFrame(object):
-    """Scenario timeseries data following the IAMC-structure
+    """Scenario timeseries data following the IAMC data format
 
     The class provides a number of diagnostic features (including validation of
     data, completeness of variables provided), processing tools (e.g.,
@@ -72,11 +72,11 @@ class IamDataFrame(object):
 
     Parameters
     ----------
-    data : ixmp.Scenario, pd.DataFrame or data file
-        an instance of an :class:`ixmp.Scenario`, :class:`pandas.DataFrame`,
-        or data file with the required data columns.
-        A pandas.DataFrame can have the required data as columns or index.
-        Support is provided additionally for R-style data columns for years,
+    data : :class:`pandas.DataFrame`, :class:`ixmp.Scenario`,
+            or file-like object as str or :class:`pathlib.Path`
+        Scenario timeseries data following the IAMC data format or
+        a supported variation as pandas object, a path to a file,
+        or a scenario of an ixmp instance.
     meta : :class:`pandas.DataFrame`, optional
         A dataframe with suitable 'meta' indicators for the new instance.
         The index will be downselected to scenarios present in `data`.
@@ -91,10 +91,14 @@ class IamDataFrame(object):
 
     Notes
     -----
+    A :class:`pandas.DataFrame` can have the required dimensions
+    as columns or index.
+    R-style integer column headers (i.e., `X2015`) are acceptable.
+
     When initializing an :class:`IamDataFrame` from an xlsx file,
     |pyam| will per default look for the sheets 'data' and 'meta' to
     populate the respective tables. Custom sheet names can be specified with
-    kwargs :code:`sheet_name` ('data') and :code:`meta_sheet_name` ('meta')
+    kwargs :code:`sheet_name` ('data') and :code:`meta_sheet_name` ('meta').
     Calling the class with :code:`meta_sheet_name=False` will
     skip the import of the 'meta' table.
 

From 4c9f6ed86f711625f3e6fe68ae38875939e89636 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Wed, 18 Nov 2020 06:55:40 +0100
Subject: [PATCH 9/9] Add to release notes

---
 RELEASE_NOTES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 8324afc3c..47aa1a138 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -2,6 +2,7 @@
 
 ## Individual updates
 
+- [#458](https://github.com/IAMconsortium/pyam/pull/458) Enable `Path` for IamDataFrame initialization 
 - [#454](https://github.com/IAMconsortium/pyam/pull/454) Enable dimensionless units and fix `info()` if IamDataFrame is empty
 - [#451](https://github.com/IAMconsortium/pyam/pull/451) Fix unit conversions from C to CO2eq
 - [#450](https://github.com/IAMconsortium/pyam/pull/450) Defer logging set-up to when the first logging message is generated