From 76282c18d411526cc046fc1c19fc3ec5211f46b1 Mon Sep 17 00:00:00 2001 From: Patrick Date: Thu, 6 Apr 2023 14:44:03 +0200 Subject: [PATCH] added more flexibility in compile_run_output --- oggm/tests/test_utils.py | 63 ++++++++++++++++++++++++++++++++++++++++ oggm/utils/_workflow.py | 60 +++++++++++++++++++++++++++++--------- 2 files changed, 109 insertions(+), 14 deletions(-) diff --git a/oggm/tests/test_utils.py b/oggm/tests/test_utils.py index a1d83afe4..232fcf50d 100644 --- a/oggm/tests/test_utils.py +++ b/oggm/tests/test_utils.py @@ -27,6 +27,7 @@ from oggm.exceptions import (InvalidParamsError, InvalidDEMError, InvalidWorkflowError, DownloadVerificationFailedException) +from oggm.core import flowline pytestmark = pytest.mark.test_env("utils") @@ -477,6 +478,68 @@ def test_ref_data_manager(self): assert type(cfg.DATA) is multiprocessing.managers.DictProxy +class TestWorkflowUtils: + def test_compile_run_output(self, hef_gdir, hef_copy_gdir): + + # Here I test if compile_run_output works with different data variables + gdirs = [hef_gdir, hef_copy_gdir] + filesuffix = '_compile_run_output_test' + cfg.PARAMS['store_model_geometry'] = True + + # we run the first gdir without the variables 'area_min_h' (2d) and + # 'melt_on_glacier' (3d, with store_monthly_hydro=True) + def remove_diag_var(variable): + try: + cfg.PARAMS['store_diagnostic_variables'].remove(variable) + except ValueError as err: + if str(err) == 'list.remove(x): x not in list': + pass + else: + raise + remove_diag_var('area_min_h') + remove_diag_var('melt_on_glacier') + flowline.run_with_hydro(gdirs[0], + run_task=flowline.run_constant_climate, + store_monthly_hydro=True, + y0=1980, halfsize=1, nyears=2, + output_filesuffix=filesuffix) + + # the second gdir run includes the variables 'area_min_h' (2d) and + # 'melt_on_glacier' (3d, with store_monthly_hydro=True) + cfg.PARAMS['store_diagnostic_variables'].append('area_min_h') + cfg.PARAMS['store_diagnostic_variables'].append('melt_on_glacier') + flowline.run_with_hydro(gdirs[1], + run_task=flowline.run_constant_climate, + store_monthly_hydro=True, + y0=1980, halfsize=1, nyears=2, + output_filesuffix=filesuffix) + + # We need to test two things: + # First: If their is a variable in the first diagnostics file which is + # not present in the second one no KeyError should be raised. + ds_1 = utils.compile_run_output([gdirs[0], gdirs[1]], + input_filesuffix=filesuffix) + + def check_result(ds): + assert 'area_m2_min_h' in ds.data_vars + assert 'melt_on_glacier' in ds.data_vars + assert 'melt_on_glacier_monthly' in ds.data_vars + assert np.all(np.isnan( + ds.loc[{'rgi_id': gdirs[0].rgi_id}]['area_m2_min_h'].values)) + assert np.all(np.isnan( + ds.loc[{'rgi_id': gdirs[0].rgi_id}]['melt_on_glacier'].values)) + assert np.all(np.isnan( + ds.loc[{'rgi_id': gdirs[0].rgi_id}]['melt_on_glacier_monthly'].values)) + + check_result(ds_1) + + # Second: If a variable in the second diagnostics file which is not + # present in the first one it should still be included in the result. + ds_2 = utils.compile_run_output([gdirs[1], gdirs[0]], + input_filesuffix=filesuffix) + check_result(ds_2) + + class TestStartFromTar(unittest.TestCase): def setUp(self): diff --git a/oggm/utils/_workflow.py b/oggm/utils/_workflow.py index ad72e19f1..595701cc7 100644 --- a/oggm/utils/_workflow.py +++ b/oggm/utils/_workflow.py @@ -1101,9 +1101,15 @@ def compile_run_output(gdirs, path=True, input_filesuffix='', # Get the dimensions of all this rgi_ids = [gd.rgi_id for gd in gdirs] - # To find the longest time, we have to open all files unfortunately + # To find the longest time, we have to open all files unfortunately, we + # also create a list of all data variables (in case not all files contain + # the same data variables), and finally we decide on the name of "3d" + # variables in case we have daily time_info = {} time_keys = ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month'] + data_vars = {} + name_2d_dim = 'month_2d' + contains_3d_data = False for gd in gdirs: fp = gd.get_filepath('model_diagnostics', filesuffix=input_filesuffix) try: @@ -1132,6 +1138,28 @@ def compile_run_output(gdirs, path=True, input_filesuffix='', time_info[cn] = np.append(ds.variables[cn][:p], time_info[cn]) + # check if their are new data variables and add them + for vn in ds.variables: + # exclude time variables + if vn in ['month_2d', 'calender_month_2d', + 'hydro_month_2d']: + name_2d_dim = 'month_2d' + contains_3d_data = True + elif vn in ['day_2d', 'calender_day_2d', 'hydro_day_2d']: + name_2d_dim = 'day_2d' + contains_3d_data = True + elif vn not in ['time', 'hydro_year', 'hydro_month', + 'calendar_year', 'calendar_month']: + # check if data variable is new + if vn not in data_vars.keys(): + data_vars[vn] = dict() + data_vars[vn]['dims'] = ds.variables[vn].dimensions + data_vars[vn]['attrs'] = dict() + for attr in ds.variables[vn].ncattrs(): + if attr not in ['_FillValue', 'coordinates']: + data_vars[vn]['attrs'][attr] = getattr( + ds.variables[vn], attr) + # If this worked, keep it as template ppath = fp except FileNotFoundError: @@ -1165,19 +1193,15 @@ def compile_run_output(gdirs, path=True, input_filesuffix='', ds.coords[cn] = ('time', time_info[cn]) ds[cn].attrs['description'] = ds_diag[cn].attrs['description'] - # We decide on the name of "3d" variables in case we have daily - # output - name_2d_dim = 'day_2d' if 'day_2d' in ds_diag.dims else 'month_2d' - # Prepare the 2D variables shape = (len(time), len(rgi_ids)) out_2d = dict() - for vn in ds_diag.data_vars: - if name_2d_dim in ds_diag[vn].dims: + for vn in data_vars: + if name_2d_dim in data_vars[vn]['dims']: continue var = dict() var['data'] = np.full(shape, np.nan) - var['attrs'] = ds_diag[vn].attrs + var['attrs'] = data_vars[vn]['attrs'] out_2d[vn] = var # 1D Variables @@ -1196,7 +1220,7 @@ def compile_run_output(gdirs, path=True, input_filesuffix='', # Maybe 3D? out_3d = dict() - if name_2d_dim in ds_diag.dims: + if contains_3d_data: # We have some 3d vars month_2d = ds_diag[name_2d_dim] ds.coords[name_2d_dim] = (name_2d_dim, month_2d.data) @@ -1204,12 +1228,12 @@ def compile_run_output(gdirs, path=True, input_filesuffix='', ds.coords[cn] = (name_2d_dim, ds_diag[cn].values) shape = (len(time), len(month_2d), len(rgi_ids)) - for vn in ds_diag.data_vars: - if name_2d_dim not in ds_diag[vn].dims: + for vn in data_vars: + if name_2d_dim not in data_vars[vn]['dims']: continue var = dict() var['data'] = np.full(shape, np.nan) - var['attrs'] = ds_diag[vn].attrs + var['attrs'] = data_vars[vn]['attrs'] out_3d[vn] = var # Read out @@ -1222,9 +1246,17 @@ def compile_run_output(gdirs, path=True, input_filesuffix='', a = np.nonzero(time == it[0])[0][0] b = np.nonzero(time == it[-1])[0][0] + 1 for vn, var in out_2d.items(): - var['data'][a:b, i] = ds_diag.variables[vn][:] + # try statement if some data variables not in all files + try: + var['data'][a:b, i] = ds_diag.variables[vn][:] + except KeyError: + pass for vn, var in out_3d.items(): - var['data'][a:b, :, i] = ds_diag.variables[vn][:] + # try statement if some data variables not in all files + try: + var['data'][a:b, :, i] = ds_diag.variables[vn][:] + except KeyError: + pass for vn, var in out_1d.items(): var['data'][i] = ds_diag.getncattr(vn) except FileNotFoundError: