Skip to content

Commit

Permalink
added more flexibility in compile_run_output
Browse files Browse the repository at this point in the history
  • Loading branch information
pat-schmitt committed Apr 6, 2023
1 parent 9c8d8f5 commit 76282c1
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 14 deletions.
63 changes: 63 additions & 0 deletions oggm/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from oggm.exceptions import (InvalidParamsError, InvalidDEMError,
InvalidWorkflowError,
DownloadVerificationFailedException)
from oggm.core import flowline


pytestmark = pytest.mark.test_env("utils")
Expand Down Expand Up @@ -477,6 +478,68 @@ def test_ref_data_manager(self):
assert type(cfg.DATA) is multiprocessing.managers.DictProxy


class TestWorkflowUtils:
def test_compile_run_output(self, hef_gdir, hef_copy_gdir):

# Here I test if compile_run_output works with different data variables
gdirs = [hef_gdir, hef_copy_gdir]
filesuffix = '_compile_run_output_test'
cfg.PARAMS['store_model_geometry'] = True

# we run the first gdir without the variables 'area_min_h' (2d) and
# 'melt_on_glacier' (3d, with store_monthly_hydro=True)
def remove_diag_var(variable):
try:
cfg.PARAMS['store_diagnostic_variables'].remove(variable)
except ValueError as err:
if str(err) == 'list.remove(x): x not in list':
pass
else:
raise
remove_diag_var('area_min_h')
remove_diag_var('melt_on_glacier')
flowline.run_with_hydro(gdirs[0],
run_task=flowline.run_constant_climate,
store_monthly_hydro=True,
y0=1980, halfsize=1, nyears=2,
output_filesuffix=filesuffix)

# the second gdir run includes the variables 'area_min_h' (2d) and
# 'melt_on_glacier' (3d, with store_monthly_hydro=True)
cfg.PARAMS['store_diagnostic_variables'].append('area_min_h')
cfg.PARAMS['store_diagnostic_variables'].append('melt_on_glacier')
flowline.run_with_hydro(gdirs[1],
run_task=flowline.run_constant_climate,
store_monthly_hydro=True,
y0=1980, halfsize=1, nyears=2,
output_filesuffix=filesuffix)

# We need to test two things:
# First: If their is a variable in the first diagnostics file which is
# not present in the second one no KeyError should be raised.
ds_1 = utils.compile_run_output([gdirs[0], gdirs[1]],
input_filesuffix=filesuffix)

def check_result(ds):
assert 'area_m2_min_h' in ds.data_vars
assert 'melt_on_glacier' in ds.data_vars
assert 'melt_on_glacier_monthly' in ds.data_vars
assert np.all(np.isnan(
ds.loc[{'rgi_id': gdirs[0].rgi_id}]['area_m2_min_h'].values))
assert np.all(np.isnan(
ds.loc[{'rgi_id': gdirs[0].rgi_id}]['melt_on_glacier'].values))
assert np.all(np.isnan(
ds.loc[{'rgi_id': gdirs[0].rgi_id}]['melt_on_glacier_monthly'].values))

check_result(ds_1)

# Second: If a variable in the second diagnostics file which is not
# present in the first one it should still be included in the result.
ds_2 = utils.compile_run_output([gdirs[1], gdirs[0]],
input_filesuffix=filesuffix)
check_result(ds_2)


class TestStartFromTar(unittest.TestCase):

def setUp(self):
Expand Down
60 changes: 46 additions & 14 deletions oggm/utils/_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,9 +1101,15 @@ def compile_run_output(gdirs, path=True, input_filesuffix='',
# Get the dimensions of all this
rgi_ids = [gd.rgi_id for gd in gdirs]

# To find the longest time, we have to open all files unfortunately
# To find the longest time, we have to open all files unfortunately, we
# also create a list of all data variables (in case not all files contain
# the same data variables), and finally we decide on the name of "3d"
# variables in case we have daily
time_info = {}
time_keys = ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']
data_vars = {}
name_2d_dim = 'month_2d'
contains_3d_data = False
for gd in gdirs:
fp = gd.get_filepath('model_diagnostics', filesuffix=input_filesuffix)
try:
Expand Down Expand Up @@ -1132,6 +1138,28 @@ def compile_run_output(gdirs, path=True, input_filesuffix='',
time_info[cn] = np.append(ds.variables[cn][:p],
time_info[cn])

# check if their are new data variables and add them
for vn in ds.variables:
# exclude time variables
if vn in ['month_2d', 'calender_month_2d',
'hydro_month_2d']:
name_2d_dim = 'month_2d'
contains_3d_data = True
elif vn in ['day_2d', 'calender_day_2d', 'hydro_day_2d']:
name_2d_dim = 'day_2d'
contains_3d_data = True
elif vn not in ['time', 'hydro_year', 'hydro_month',
'calendar_year', 'calendar_month']:
# check if data variable is new
if vn not in data_vars.keys():
data_vars[vn] = dict()
data_vars[vn]['dims'] = ds.variables[vn].dimensions
data_vars[vn]['attrs'] = dict()
for attr in ds.variables[vn].ncattrs():
if attr not in ['_FillValue', 'coordinates']:
data_vars[vn]['attrs'][attr] = getattr(
ds.variables[vn], attr)

# If this worked, keep it as template
ppath = fp
except FileNotFoundError:
Expand Down Expand Up @@ -1165,19 +1193,15 @@ def compile_run_output(gdirs, path=True, input_filesuffix='',
ds.coords[cn] = ('time', time_info[cn])
ds[cn].attrs['description'] = ds_diag[cn].attrs['description']

# We decide on the name of "3d" variables in case we have daily
# output
name_2d_dim = 'day_2d' if 'day_2d' in ds_diag.dims else 'month_2d'

# Prepare the 2D variables
shape = (len(time), len(rgi_ids))
out_2d = dict()
for vn in ds_diag.data_vars:
if name_2d_dim in ds_diag[vn].dims:
for vn in data_vars:
if name_2d_dim in data_vars[vn]['dims']:
continue
var = dict()
var['data'] = np.full(shape, np.nan)
var['attrs'] = ds_diag[vn].attrs
var['attrs'] = data_vars[vn]['attrs']
out_2d[vn] = var

# 1D Variables
Expand All @@ -1196,20 +1220,20 @@ def compile_run_output(gdirs, path=True, input_filesuffix='',

# Maybe 3D?
out_3d = dict()
if name_2d_dim in ds_diag.dims:
if contains_3d_data:
# We have some 3d vars
month_2d = ds_diag[name_2d_dim]
ds.coords[name_2d_dim] = (name_2d_dim, month_2d.data)
cn = f'calendar_{name_2d_dim}'
ds.coords[cn] = (name_2d_dim, ds_diag[cn].values)

shape = (len(time), len(month_2d), len(rgi_ids))
for vn in ds_diag.data_vars:
if name_2d_dim not in ds_diag[vn].dims:
for vn in data_vars:
if name_2d_dim not in data_vars[vn]['dims']:
continue
var = dict()
var['data'] = np.full(shape, np.nan)
var['attrs'] = ds_diag[vn].attrs
var['attrs'] = data_vars[vn]['attrs']
out_3d[vn] = var

# Read out
Expand All @@ -1222,9 +1246,17 @@ def compile_run_output(gdirs, path=True, input_filesuffix='',
a = np.nonzero(time == it[0])[0][0]
b = np.nonzero(time == it[-1])[0][0] + 1
for vn, var in out_2d.items():
var['data'][a:b, i] = ds_diag.variables[vn][:]
# try statement if some data variables not in all files
try:
var['data'][a:b, i] = ds_diag.variables[vn][:]
except KeyError:
pass
for vn, var in out_3d.items():
var['data'][a:b, :, i] = ds_diag.variables[vn][:]
# try statement if some data variables not in all files
try:
var['data'][a:b, :, i] = ds_diag.variables[vn][:]
except KeyError:
pass
for vn, var in out_1d.items():
var['data'][i] = ds_diag.getncattr(vn)
except FileNotFoundError:
Expand Down

0 comments on commit 76282c1

Please sign in to comment.