Skip to content

Commit

Permalink
Bugfix in merge_gridded_data (#1697)
Browse files Browse the repository at this point in the history
* only preserve total when not zero in merge_gridded_data

* include smooting in merge_gridded_data

* added possibility to merge monthly distributed thicknesses

* small adaptations in merge_simulated_thickness
  • Loading branch information
pat-schmitt committed Apr 25, 2024
1 parent 5110d0e commit 7489be1
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 24 deletions.
27 changes: 26 additions & 1 deletion oggm/core/gis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,7 @@ def reproject_gridded_data_variable_to_grid(gdir,
use_glacier_mask=True,
interp='nearest',
preserve_totals=True,
smooth_radius=None,
slice_of_variable=None):
"""
Function for reprojecting a gridded data variable to a different grid.
Expand Down Expand Up @@ -1926,6 +1927,10 @@ def reproject_gridded_data_variable_to_grid(gdir,
The total value is defined as the sum of all grid cell values times the
area of the grid cell (e.g. preserving ice volume).
Default is True.
smooth_radius : int
pixel size of the gaussian smoothing, only used if preserve_totals is
True. Default is to use cfg.PARAMS['smooth_window'] (i.e. a size in
meters). Set to zero to suppress smoothing.
slice_of_variable : None | dict
Can provide dimensions with values as a dictionary for extracting only
a slice of the data before reprojecting. This can be useful for large
Expand Down Expand Up @@ -1958,6 +1963,7 @@ def reproject_gridded_data_variable_to_grid(gdir,
if preserve_totals:
# only preserve for float variables
if not np.issubdtype(data, np.integer):
# do we want to do smoothing?

if len(data.dims) == 2:
sum_axis = None
Expand All @@ -1969,10 +1975,29 @@ def reproject_gridded_data_variable_to_grid(gdir,

total_before = (np.nansum(data.values, axis=sum_axis) *
ds.salem.grid.dx ** 2)

if smooth_radius != 0:
if smooth_radius is None:
smooth_radius = np.rint(cfg.PARAMS['smooth_window'] /
target_grid.dx)
# use data_mask to not expand the extent of the data
data_mask = ~np.isclose(r_data, 0, atol=1e-6)
if r_data.ndim == 3:
r_data = np.array(
[gaussian_blur(r_data[i, :, :], int(smooth_radius))
for i in range(r_data.shape[0])])
else:
r_data = gaussian_blur(r_data, int(smooth_radius))
r_data[~data_mask] = 0

total_after = (np.nansum(r_data, axis=sum_axis) *
target_grid.dx ** 2)

factor = total_before / total_after
# only preserve total if there is some data before
factor = np.where(np.isclose(total_before, 0, atol=1e-6),
0.,
total_before / total_after)

if len(data.dims) == 3:
# need to add two axis for broadcasting
factor = factor[:, np.newaxis, np.newaxis]
Expand Down
73 changes: 51 additions & 22 deletions oggm/sandbox/distribute_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@ def distribute_thickness_from_simulation(gdir,
into account for the melt *within* one band, a downside which is somehow
mitigated with smoothing (the default is quite some smoothing).
Writes a new file caller gridded_simulation.nc together with a new time dimension.
Writes a new file called gridded_simulation.nc together with a new time
dimension.
Parameters
----------
Expand All @@ -244,13 +245,13 @@ def distribute_thickness_from_simulation(gdir,
input_filesuffix : str
the filesuffix of the flowline diagnostics file.
output_filesuffix : str
the filesuffix of the gridded_simulation file to write. If empty,
the filesuffix of the gridded_simulation.nc file to write. If empty,
it will be set to input_filesuffix.
concat_input_filesuffix : str
the filesuffix of the flowline diagnostics file to concat with the
main one. `concat_input_filesuffix` is assumed to be prior to the
main one, i.e. often you will be calling
`concat_input_filesuffix='_spinup_historical'`.
the filesuffix of the flowline diagnostics file to concat with the main
one, provided with input_filesuffix. `concat_input_filesuffix` is
assumed to be prior in time to the main one, i.e. often you will be
calling `concat_input_filesuffix='_spinup_historical'`.
fl_diag : xarray.core.dataset.Dataset
directly provide a flowline diagnostics file instead of reading it
from disk. This could be useful, for example, to merge two files
Expand Down Expand Up @@ -385,17 +386,23 @@ def distribute_thickness_from_simulation(gdir,
this_glacier_mask = new_thick > 0
this_vol = dgy.volume_m3.values.sum()

# Smooth
dx = gdir.grid.dx
if smooth_radius != 0:
if smooth_radius is None:
smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / dx)
new_thick = gaussian_blur(new_thick, int(smooth_radius))
if np.isclose(this_vol, 0, atol=1e-6):
# No ice left
new_thick = np.NaN
else:
# Smooth
dx = gdir.grid.dx
if smooth_radius != 0:
if smooth_radius is None:
smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / dx)
new_thick = gaussian_blur(new_thick, int(smooth_radius))

new_thick[~this_glacier_mask] = np.NaN

# Conserve volume
tmp_vol = np.nansum(new_thick) * dx2
new_thick *= this_vol / tmp_vol
# Conserve volume
tmp_vol = np.nansum(new_thick) * dx2
new_thick *= this_vol / tmp_vol

out_thick[i, :] = new_thick

with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
Expand Down Expand Up @@ -433,6 +440,7 @@ def merge_simulated_thickness(gdirs,
keep_dem_file=False,
interp='nearest',
preserve_totals=True,
smooth_radius=None,
use_glacier_mask=True,
add_topography=True,
use_multiprocessing=False,
Expand All @@ -453,13 +461,18 @@ def merge_simulated_thickness(gdirs,
Default is gridded_simulation_merged{simulation_filesuffix}.
simulation_filesuffix : str
the filesuffix of the gridded_simulation file
years_to_merge : list | None
If not None, only the years in this list are merged. Default is None.
years_to_merge : list | xr.DataArray | None
If not None, and list of integers only the years at month 1 are
merged. You can also provide the timesteps as xarray.DataArray
containing calendar_year and calendar_month as it is the standard
output format for gridded nc files of OGGM.
Default is None.
keep_dem_file
interp
add_topography
use_glacier_mask
preserve_totals
smooth_radius
use_multiprocessing : bool
If we use multiprocessing the merging is done in parallel, but requires
more memory. Default is True.
Expand Down Expand Up @@ -498,6 +511,7 @@ def _calc_bedrock_topo(fp):
'distributed_thickness',
],
preserve_totals=preserve_totals,
smooth_radius=smooth_radius,
use_glacier_mask=use_glacier_mask,
add_topography=add_topography,
keep_dem_file=keep_dem_file,
Expand All @@ -514,22 +528,36 @@ def _calc_bedrock_topo(fp):

# then the simulated thickness files
if years_to_merge is None:
# open first file to get the years
# open first file to get all available timesteps
with xr.open_dataset(
gdirs[0].get_filepath('gridded_simulation',
filesuffix=simulation_filesuffix)
) as ds:
years_to_merge = ds.time.values
years_to_merge = ds.time

for timestep in years_to_merge:
if isinstance(timestep, int) or isinstance(timestep, np.int64):
year = timestep
month = 1
elif isinstance(timestep, xr.DataArray):
year = int(timestep.calendar_year)
month = int(timestep.calendar_month)
else:
raise NotImplementedError('Wrong type for years_to_merge! '
'Should be list of int or '
'xarray.DataArray for monthly '
'timesteps.')

for yr in years_to_merge:
workflow.merge_gridded_data(
gdirs,
output_folder=output_folder,
output_filename=f"{output_filename}_{yr}",
output_filename=f"{output_filename}_{year}_{month:02d}",
input_file='gridded_simulation',
input_filesuffix=simulation_filesuffix,
included_variables=[('simulated_thickness', {'time': [yr]})],
included_variables=[('simulated_thickness',
{'time': [timestep]})],
preserve_totals=preserve_totals,
smooth_radius=smooth_radius,
use_glacier_mask=use_glacier_mask,
add_topography=False,
keep_dem_file=False,
Expand Down Expand Up @@ -557,6 +585,7 @@ def _calc_bedrock_topo(fp):
],
[('simulated_thickness', selected_time)]],
preserve_totals=preserve_totals,
smooth_radius=smooth_radius,
use_glacier_mask=use_glacier_mask,
add_topography=add_topography,
keep_dem_file=keep_dem_file,
Expand Down
34 changes: 33 additions & 1 deletion oggm/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5772,7 +5772,15 @@ def test_merge_simulated_thickness(self):
prepro_base_url='https://cluster.klima.uni-bremen.de/~oggm/gdirs/'
'oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5/')

workflow.execute_entity_task(run_random_climate, gdirs, y0=2014,
# we run the first glacier with a larger temperature bias to force a
# a complete retreat for testing
workflow.execute_entity_task(tasks.run_random_climate, gdirs[0:1],
y0=2014, halfsize=5, nyears=20, seed=0,
temperature_bias=5,
output_filesuffix='_random',
store_fl_diagnostics=True)

workflow.execute_entity_task(run_random_climate, gdirs[1:], y0=2014,
halfsize=5, nyears=20, seed=0,
output_filesuffix='_random',
store_fl_diagnostics=True)
Expand Down Expand Up @@ -5850,3 +5858,27 @@ def test_merge_simulated_thickness(self):
ds_merged_selected_years = ds
assert_allclose(ds_merged_selected_years.simulated_thickness.values,
ds_merged.loc[{'time': years_to_test}].simulated_thickness.values)

# test merging of monthly distributed data
workflow.execute_entity_task(run_random_climate, gdirs, y0=2014,
halfsize=5, nyears=2, seed=0,
output_filesuffix='_random_short',
store_fl_diagnostics=True)
workflow.execute_entity_task(
distribute_2d.distribute_thickness_from_simulation,
gdirs,
input_filesuffix='_random_short',
add_monthly=True,
)
distribute_2d.merge_simulated_thickness(
gdirs,
simulation_filesuffix='_random_short',
reset=True)
files_to_open_monthly = glob.glob(
os.path.join(cfg.PATHS['working_dir'],
'gridded_simulation_merged_random_short*'))
assert len(files_to_open_monthly) == 26
with xr.open_mfdataset(files_to_open_monthly) as ds:
ds_merged_monthly = ds

assert len(ds_merged_monthly.time) == 25
6 changes: 6 additions & 0 deletions oggm/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,7 @@ def merge_gridded_data(gdirs, output_folder=None,
input_filesuffix='',
included_variables='all',
preserve_totals=True,
smooth_radius=None,
use_glacier_mask=True,
add_topography=False,
keep_dem_file=False,
Expand Down Expand Up @@ -901,6 +902,10 @@ def merge_gridded_data(gdirs, output_folder=None,
original file. The total value is defined as the sum of all grid cell
values times the area of the grid cell (e.g. preserving ice volume).
Default is True.
smooth_radius : int
pixel size of the gaussian smoothing, only used if preserve_totals is
True. Default is to use cfg.PARAMS['smooth_window'] (i.e. a size in
meters). Set to zero to suppress smoothing.
use_glacier_mask : bool
If True only the data cropped by the glacier mask is included in the
merged file. You must make sure that the variable 'glacier_mask' exists
Expand Down Expand Up @@ -1085,6 +1090,7 @@ def merge_gridded_data(gdirs, output_folder=None,
use_glacier_mask=use_glacier_mask,
interp=interp,
preserve_totals=preserve_totals,
smooth_radius=smooth_radius,
slice_of_variable=slice_of_var,
)

Expand Down

0 comments on commit 7489be1

Please sign in to comment.