[WIP] Fix requesting ixmp data over API (#271) (#273)

Fix requesting ixmp data over API (#271) - pass credentials to request available apps - add test for invalid credentials - tolerate empty result - set n/a meta column values - change http response content with text for logging errors - pass empty filter if all variables/regions to fetch from API - readability/docstrings/robustness - add text to release notes - change back construction of df from API call result (fixed column list, needed for empty response!) - allow customizing auth server API URL (needed e.g. for local development)
IAMconsortium · Oct 30, 2019 · ac7caa0 · ac7caa0
1 parent 1319fe4
commit ac7caa0
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 16 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -12,6 +12,7 @@
 - [#243](https://github.com/IAMconsortium/pyam/pull/243) Update `pyam.iiasa.Connection` to support all public and private database connections. DEPRECATED: the argument 'iamc15' has been deprecated in favor of names as queryable directly from the REST API.
 - [#241](https://github.com/IAMconsortium/pyam/pull/241) Add `set_meta_from_data` feature
 - [#236](https://github.com/IAMconsortium/pyam/pull/236) Add `swap_time_for_year` method and confirm datetime column is compatible with pyam features
+- [#273](https://github.com/IAMconsortium/pyam/pull/273) Fix several issues accessing IXMP API (passing correct credentials, improve reliability for optional fields in result payload) 
 
 # Release v0.2.0
 

diff --git a/pyam/iiasa.py b/pyam/iiasa.py
@@ -31,12 +31,12 @@
 
 def _check_response(r, msg='Trouble with request', error=RuntimeError):
     if not r.ok:
-        raise error('{}: {}'.format(msg, str(r)))
+        raise error('{}: {}'.format(msg, str(r.text)))
 
 
-def _get_token(creds):
+def _get_token(creds, base_url):
     if creds is None:  # get anonymous auth
-        url = '/'.join([_BASE_URL, 'anonym'])
+        url = '/'.join([base_url, 'anonym'])
         r = requests.get(url)
         _check_response(r, 'Could not get anonymous token')
         return r.json()
@@ -64,7 +64,7 @@ def _get_token(creds):
     headers = {'Accept': 'application/json',
                'Content-Type': 'application/json'}
     data = {'username': user, 'password': pw}
-    url = '/'.join([_BASE_URL, 'login'])
+    url = '/'.join([base_url, 'login'])
     r = requests.post(url, headers=headers, data=json.dumps(data))
     _check_response(r, 'Login failed for user: {}'.format(user))
     return r.json()
@@ -73,7 +73,7 @@ def _get_token(creds):
 class Connection(object):
     """A class to facilitate querying an IIASA scenario explorer database"""
 
-    def __init__(self, name=None, creds=None):
+    def __init__(self, name=None, creds=None, base_url=_BASE_URL):
         """
         Parameters
         ----------
@@ -86,8 +86,10 @@ def __init__(self, name=None, creds=None):
                 (preferred)
               - an ordered container (tuple, list, etc.) with the same values
               - a dictionary with the same keys
+        base_url: str, custom authentication server URL
         """
-        self._token = _get_token(creds)
+        self._base_url = base_url
+        self._token = _get_token(creds, base_url=self._base_url)
 
         # connect if provided a name
         self._connected = None
@@ -97,7 +99,7 @@ def __init__(self, name=None, creds=None):
     @property
     @lru_cache()
     def valid_connections(self):
-        url = '/'.join([_BASE_URL, 'applications'])
+        url = '/'.join([self._base_url, 'applications'])
         headers = {'Authorization': 'Bearer {}'.format(self._token)}
         r = requests.get(url, headers=headers)
         _check_response(r, 'Could not get valid connection list')
@@ -126,7 +128,7 @@ def connect(self, name):
             """
             raise ValueError(msg.format(name, valid))
 
-        url = '/'.join([_BASE_URL, 'applications', name, 'config'])
+        url = '/'.join([self._base_url, 'applications', name, 'config'])
         headers = {'Authorization': 'Bearer {}'.format(self._token)}
         r = requests.get(url, headers=headers)
         _check_response(r, 'Could not get application information')
@@ -279,7 +281,13 @@ def _match(data, patterns):
 
         # get unique other values
         variables = _match(self.variables(), v_pattern)
+        # pass empty list to API if all variables selected
+        if len(variables) == len(self.variables()):
+            variables = []
         regions = _match(self.regions(), r_pattern)
+        # pass empty list to API if all regions selected
+        if len(regions) == len(self.regions()):
+            regions = []
 
         data = {
             "filters": {
@@ -317,14 +325,23 @@ def query(self, **kwargs):
         }
         data = json.dumps(self._query_post_data(**kwargs))
         url = '/'.join([self._base_url, 'runs/bulk/ts'])
+        logger().debug('Querying timeseries data '
+                       'from {} with filter {}'.format(url, data))
         r = requests.post(url, headers=headers, data=data)
         _check_response(r)
         # refactor returned json object to be castable to an IamDataFrame
-        df = (
-            pd.read_json(r.content, orient='records')
-            .drop(columns='runId')
-            .rename(columns={'time': 'subannual'})
-        )
+        df = pd.read_json(r.content, orient='records')
+        logger().debug('Response size is {0} bytes, '
+                       '{1} records'.format(len(r.content), len(df)))
+        columns = ['model', 'scenario', 'variable', 'unit',
+                   'region', 'year', 'value', 'time', 'meta',
+                   'version']
+        # keep only known columns or init empty df
+        df = pd.DataFrame(data=df, columns=columns)
+        # replace missing meta (for backward compatibility)
+        df.fillna({'meta': 0}, inplace=True)
+        df.fillna({'time': 0}, inplace=True)
+        df.rename(columns={'time': 'subannual'}, inplace=True)
         # check if returned dataframe has subannual disaggregation, drop if not
         if pd.Series([i in [-1, 'year'] for i in df.subannual]).all():
             df.drop(columns='subannual', inplace=True)
@@ -333,15 +350,17 @@ def query(self, **kwargs):
             df[META_IDX + ['version']].drop_duplicates()
             .groupby(META_IDX).count().version
         )
-        if max(lst) > 1:
+        # checking if there are multiple versions
+        # for every model/scenario combination
+        if len(lst) > 1 and max(lst) > 1:
             raise ValueError('multiple versions for {}'.format(
                 lst[lst > 1].index.to_list()))
         df.drop(columns='version', inplace=True)
 
         return df
 
 
-def read_iiasa(name, meta=False, **kwargs):
+def read_iiasa(name, meta=False, creds=None, base_url=_BASE_URL, **kwargs):
     """
     Query an IIASA database. See Connection.query() for more documentation
 
@@ -351,10 +370,15 @@ def read_iiasa(name, meta=False, **kwargs):
         A valid IIASA database name, see pyam.iiasa.valid_connection_names()
     meta : bool or list of strings
         If not False, also include metadata indicators (or subset if provided).
+    creds : dict
+        Credentials to access IXMP and authentication service APIs
+        (username/password)
+    base_url: str
+        Authentication server URL
     kwargs :
         Arguments for pyam.iiasa.Connection.query()
     """
-    conn = Connection(name)
+    conn = Connection(name, creds, base_url)
     # data
     df = conn.query(**kwargs)
     df = IamDataFrame(df)

diff --git a/tests/test_iiasa.py b/tests/test_iiasa.py
@@ -158,6 +158,19 @@ def test_query_IXSE_SR15():
     assert len(df) == 20
 
 
+def test_query_IXSE_AR6():
+    with pytest.raises(RuntimeError) as excinfo:
+        variable = 'Emissions|CO2|Energy|Demand|Transportation'
+        creds = dict(username='mahamba', password='verysecret')
+        iiasa.read_iiasa('IXSE_AR6',
+                         scenario='ADVANCE_2020_WB2C',
+                         model='AIM/CGE 2.0',
+                         region='World',
+                         variable=variable,
+                         creds=creds)
+    assert str(excinfo.value).startswith('Login failed for user: mahamba')
+
+
 def test_query_IXSE_SR15_with_metadata():
     df = iiasa.read_iiasa('IXSE_SR15',
                           model='MESSAGEix*',