diff --git a/docs/whats-new.rst b/docs/whats-new.rst index 28f96ae8c..fdb4febcb 100644 --- a/docs/whats-new.rst +++ b/docs/whats-new.rst @@ -40,6 +40,8 @@ Enhancements Level 5 now replaces level 4 and creates the minigdirs (where only the files for a model run are kept and no inversion is possible anymore) (:pull:`1425`). By `Patrick Schmitt `_ +- Added support for Milland 22 velocity and thickness in the shop (:pull:`1443`). + By `Fabien Maussion `_ Bug fixes ~~~~~~~~~ diff --git a/oggm/shop/its_live.py b/oggm/shop/its_live.py index 63e1e2998..1c71abfec 100644 --- a/oggm/shop/its_live.py +++ b/oggm/shop/its_live.py @@ -78,7 +78,6 @@ def find_region(gdir): def _reproject_and_scale(gdir, do_error=False): """Reproject and scale itslive data, avoid code duplication for error""" - reg = find_region(gdir) if reg is None: raise InvalidWorkflowError('There does not seem to be its_live data ' @@ -142,7 +141,7 @@ def _reproject_and_scale(gdir, do_error=False): # Scale back velocities - https://github.com/OGGM/oggm/issues/1014 new_vel = np.sqrt(vx**2 + vy**2) - p_ok = new_vel > 1e-5 # avoid div by zero + p_ok = new_vel > 1 # avoid div by zero vx[p_ok] = vx[p_ok] * orig_vel[p_ok] / new_vel[p_ok] vy[p_ok] = vy[p_ok] * orig_vel[p_ok] / new_vel[p_ok] diff --git a/oggm/shop/millan22.py b/oggm/shop/millan22.py new file mode 100644 index 000000000..5622d4c50 --- /dev/null +++ b/oggm/shop/millan22.py @@ -0,0 +1,377 @@ +import logging +import os +import warnings + +import numpy as np +import pandas as pd +import xarray as xr +import shapely.geometry as shpg + +try: + import salem +except ImportError: + pass + +try: + import geopandas as gpd +except ImportError: + pass + +from oggm import utils, cfg +from oggm.exceptions import InvalidWorkflowError + +# Module logger +log = logging.getLogger(__name__) + +default_base_url = 'https://cluster.klima.uni-bremen.de/~oggm/velocities/millan22/' + +_lookup_thickness = None +_lookup_velocity = None + + +def _get_lookup_thickness(): + global _lookup_thickness + if _lookup_thickness is None: + fname = default_base_url + 'millan22_thickness_lookup_shp_20220902.zip' + _lookup_thickness = gpd.read_file('zip://' + utils.file_downloader(fname)) + return _lookup_thickness + + +def _get_lookup_velocity(): + global _lookup_velocity + if _lookup_velocity is None: + fname = default_base_url + 'millan22_velocity_lookup_shp_20220902.zip' + _lookup_velocity = gpd.read_file('zip://' + utils.file_downloader(fname)) + return _lookup_velocity + + +def _filter(ds): + # Read the data and prevent bad surprises + data = ds.get_vardata().astype(np.float64) + # Nans with 0 + data[~ np.isfinite(data)] = 0 + nmax = np.nanmax(data) + if nmax == np.inf: + # Replace inf with 0 + data[data == nmax] = 0 + + return data + + +def _filter_and_reproj(gdir, var, gdf, allow_neg=True): + """ Same code for thickness and error + + Parameters + ---------- + var : 'thickness' or 'err' + gdf : the lookup + """ + + # We may have more than one file + total_data = 0 + grids_used = [] + files_used = [] + for i, s in gdf.iterrows(): + # Fetch it + url = default_base_url + s[var] + input_file = utils.file_downloader(url) + + # Subset to avoid mega files + dsb = salem.GeoTiff(input_file) + x0, x1, y0, y1 = gdir.grid.extent + dsb.set_subset(corners=((x0, y0), (x1, y1)), crs=gdir.grid.proj, margin=5) + + data = _filter(dsb) + if not allow_neg: + # Replace negative values with 0 + data[data < 0] = 0 + + if np.nansum(data) == 0: + # No need to continue + continue + + # Reproject now + r_data = gdir.grid.map_gridded_data(data, dsb.grid, interp='linear') + total_data += r_data.filled(0) + grids_used.append(dsb) + files_used.append(s.file_id) + + return total_data, files_used, grids_used + + +@utils.entity_task(log, writes=['gridded_data']) +def thickness_to_gdir(gdir, add_error=False): + """Add the Millan 22 thickness data to this glacier directory. + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + the glacier directory to process + add_error : bool + add the error data or not + """ + + # Find out which file(s) we need + gdf = _get_lookup_thickness() + cp = shpg.Point(gdir.cenlon, gdir.cenlat) + sel = gdf.loc[gdf.contains(cp)] + if len(sel) == 0: + raise InvalidWorkflowError(f'There seems to be no Millan file for this ' + f'glacier: {gdir.rgi_id}') + + total_thick, files_used, _ = _filter_and_reproj(gdir, 'thickness', sel, + allow_neg=False) + + # We mask zero ice as nodata + total_thick = np.where(total_thick == 0, np.NaN, total_thick) + + if add_error: + total_err, _, _ = _filter_and_reproj(gdir, 'err', sel, allow_neg=False) + total_err[~ np.isfinite(total_thick)] = np.NaN + # Error cannot be larger than ice thickness itself + total_err = utils.clip_max(total_err, total_thick) + + # Write + with utils.ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc: + + vn = 'millan_ice_thickness' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True) + v.units = 'm' + ln = 'Ice thickness from Millan et al. 2022' + v.long_name = ln + data_str = ' '.join(files_used) if len(files_used) > 1 else files_used[0] + v.data_source = data_str + v[:] = total_thick + + if add_error: + vn = 'millan_ice_thickness_err' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x',), zlib=True) + v.units = 'm' + ln = 'Ice thickness error from Millan et al. 2022' + v.long_name = ln + v.data_source = data_str + v[:] = total_err + + +@utils.entity_task(log, writes=['gridded_data']) +def velocity_to_gdir(gdir, add_error=False): + """Add the Millan 22 thickness data to this glacier directory. + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + the glacier directory to process + add_error : bool + add the error data or not + """ + + if gdir.rgi_region in ['05']: + raise NotImplementedError('Millan 22 does not provide velocity ' + 'products for Greenland - we would need to ' + 'implement MEASURES in the shop for this.') + + # Find out which file(s) we need + gdf = _get_lookup_velocity() + cp = shpg.Point(gdir.cenlon, gdir.cenlat) + sel = gdf.loc[gdf.contains(cp)] + if len(sel) == 0: + raise InvalidWorkflowError(f'There seems to be no Millan file for this ' + f'glacier: {gdir.rgi_id}') + + vel, files, grids = _filter_and_reproj(gdir, 'v', sel, allow_neg=False) + assert len(grids) == 1, 'Multiple velocity grids - dont know what to do.' + sel = sel.loc[sel.file_id == files[0]] + vx, _, gridsx = _filter_and_reproj(gdir, 'vx', sel) + vy, _, gridsy = _filter_and_reproj(gdir, 'vy', sel) + + dsx = gridsx[0] + dsy = gridsy[0] + grid_vel = dsx.grid + proj_vel = grid_vel.proj + grid_gla = gdir.grid + + # Get the coords at t0 + xx0, yy0 = grid_vel.center_grid.xy_coordinates + + # Compute coords at t1 + xx1 = _filter(dsx) + yy1 = _filter(dsy) + xx1 += xx0 + yy1 += yy0 + + # Transform both to glacier proj + xx0, yy0 = salem.transform_proj(proj_vel, grid_gla.proj, xx0, yy0) + xx1, yy1 = salem.transform_proj(proj_vel, grid_gla.proj, xx1, yy1) + + # Compute velocities from there + vx = xx1 - xx0 + vy = yy1 - yy0 + + # And transform to local map + vx = grid_gla.map_gridded_data(vx, grid=grid_vel, interp='linear') + vy = grid_gla.map_gridded_data(vy, grid=grid_vel, interp='linear') + + # Scale back to match velocity + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=RuntimeWarning) + new_vel = np.sqrt(vx**2 + vy**2) + p_ok = np.isfinite(new_vel) & (new_vel > 1) # avoid div by zero + scaler = vel[p_ok] / new_vel[p_ok] + vx[p_ok] = vx[p_ok] * scaler + vy[p_ok] = vy[p_ok] * scaler + + vx = vx.filled(np.NaN) + vy = vy.filled(np.NaN) + + if add_error: + err_vx, _, _ = _filter_and_reproj(gdir, 'err_vx', sel, allow_neg=False) + err_vy, _, _ = _filter_and_reproj(gdir, 'err_vy', sel, allow_neg=False) + err_vx[p_ok] = err_vx[p_ok] * scaler + err_vy[p_ok] = err_vy[p_ok] * scaler + + # Write + with utils.ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc: + + vn = 'millan_v' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True) + v.units = 'm' + ln = 'Ice velocity from Millan et al. 2022' + v.long_name = ln + v.data_source = files[0] + v[:] = vel + + vn = 'millan_vx' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True) + v.units = 'm' + ln = 'Ice velocity in map x direction from Millan et al. 2022' + v.long_name = ln + v.data_source = files[0] + v[:] = vx + + vn = 'millan_vy' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True) + v.units = 'm' + ln = 'Ice velocity in map y direction from Millan et al. 2022' + v.long_name = ln + v.data_source = files[0] + v[:] = vy + + if add_error: + vn = 'millan_err_vx' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x',), zlib=True) + v.units = 'm' + ln = 'Ice velocity error in map x direction from Millan et al. 2022' + v.long_name = ln + v.data_source = files[0] + v[:] = err_vx + + vn = 'millan_err_vy' + if vn in nc.variables: + v = nc.variables[vn] + else: + v = nc.createVariable(vn, 'f4', ('y', 'x',), zlib=True) + v.units = 'm' + ln = 'Ice velocity error in map y direction from Millan et al. 2022' + v.long_name = ln + v.data_source = files[0] + v[:] = err_vy + + +@utils.entity_task(log) +def millan_statistics(gdir): + """Gather statistics about the Millan data interpolated to this glacier. + """ + + d = dict() + + # Easy stats - this should always be possible + d['rgi_id'] = gdir.rgi_id + d['rgi_region'] = gdir.rgi_region + d['rgi_subregion'] = gdir.rgi_subregion + d['rgi_area_km2'] = gdir.rgi_area_km2 + d['millan_vol_km3'] = 0 + d['millan_area_km2'] = 0 + d['millan_perc_cov'] = 0 + + try: + with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: + thick = ds['millan_ice_thickness'].where(ds['glacier_mask'], np.NaN).load() + d['millan_vol_km3'] = float(thick.sum() * gdir.grid.dx ** 2 * 1e-9) + d['millan_area_km2'] = float((~thick.isnull()).sum() * gdir.grid.dx ** 2 * 1e-6) + d['millan_perc_cov'] = float(d['millan_area_km2'] / gdir.rgi_area_km2) + + if 'millan_ice_thickness_err' in ds: + err = ds['millan_ice_thickness_err'].where(ds['glacier_mask'], np.NaN).load() + d['millan_vol_err_km3'] = float(err.sum() * gdir.grid.dx ** 2 * 1e-9) + except (FileNotFoundError, AttributeError, KeyError): + pass + + try: + with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: + v = ds['millan_v'].where(ds['glacier_mask'], np.NaN).load() + d['millan_avg_vel'] = np.nanmean(v) + d['millan_max_vel'] = np.nanmax(v) + + if 'millan_err_vx' in ds: + err_vx = ds['millan_err_vx'].where(ds['glacier_mask'], np.NaN).load() + err_vy = ds['millan_err_vy'].where(ds['glacier_mask'], np.NaN).load() + err = (err_vx**2 + err_vy**2)**0.5 + d['millan_avg_err_vel'] = np.nanmean(err) + + except (FileNotFoundError, AttributeError, KeyError): + pass + + return d + + +@utils.global_task(log) +def compile_millan_statistics(gdirs, filesuffix='', path=True): + """Gather as much statistics as possible about a list of glaciers. + + It can be used to do result diagnostics and other stuffs. If the data + necessary for a statistic is not available (e.g.: flowlines length) it + will simply be ignored. + + Parameters + ---------- + gdirs : list of :py:class:`oggm.GlacierDirectory` objects + the glacier directories to process + filesuffix : str + add suffix to output file + path : str, bool + Set to "True" in order to store the info in the working directory + Set to a path to store the file to your chosen location + """ + from oggm.workflow import execute_entity_task + + out_df = execute_entity_task(millan_statistics, gdirs) + + out = pd.DataFrame(out_df).set_index('rgi_id') + + if path: + if path is True: + out.to_csv(os.path.join(cfg.PATHS['working_dir'], + ('millan_statistics' + + filesuffix + '.csv'))) + else: + out.to_csv(path) + + return out diff --git a/oggm/tests/conftest.py b/oggm/tests/conftest.py index 5d4b568f6..6f4d2288d 100644 --- a/oggm/tests/conftest.py +++ b/oggm/tests/conftest.py @@ -155,6 +155,7 @@ def secure_url_retrieve(url, *args, **kwargs): 'cluster.klima.uni-bremen.de/~oggm/test_gdirs/' in url or 'cluster.klima.uni-bremen.de/~oggm/demo_gdirs/' in url or 'cluster.klima.uni-bremen.de/~oggm/test_climate/' in url or + 'cluster.klima.uni-bremen.de/~oggm/test_files/' in url or 'klima.uni-bremen.de/~oggm/climate/cru/cru_cl2.nc.zip' in url or 'klima.uni-bremen.de/~oggm/geodetic_ref_mb' in url or base_extra_L4 in url or diff --git a/oggm/tests/test_shop.py b/oggm/tests/test_shop.py index a8ea39475..eb9747eff 100644 --- a/oggm/tests/test_shop.py +++ b/oggm/tests/test_shop.py @@ -12,7 +12,7 @@ import pandas as pd from oggm import utils from oggm.utils import get_demo_file -from oggm.shop import its_live, rgitopo, bedtopo +from oggm.shop import its_live, rgitopo, bedtopo, millan22 from oggm.core import gis, centerlines, massbalance from oggm import cfg, tasks, workflow @@ -107,6 +107,59 @@ def test_repro_to_glacier(self, class_case_dir, monkeypatch): plt.show() +class Test_millan22: + + @pytest.mark.slow + def test_thickvel_to_glacier(self, class_case_dir, monkeypatch): + + # Init + cfg.initialize() + cfg.PATHS['working_dir'] = class_case_dir + cfg.PARAMS['use_intersects'] = False + cfg.PATHS['dem_file'] = get_demo_file('dem_Columbia.tif') + cfg.PARAMS['border'] = 10 + + entity = gpd.read_file(get_demo_file('RGI60-01.10689.shp')).iloc[0] + gdir = oggm.GlacierDirectory(entity) + tasks.define_glacier_region(gdir) + tasks.glacier_masks(gdir) + + # use our files + base_url = 'https://cluster.klima.uni-bremen.de/~oggm/test_files/millan22/' + monkeypatch.setattr(millan22, 'default_base_url', base_url) + + millan22.thickness_to_gdir(gdir) + + with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: + mask = ds.glacier_mask.data.astype(bool) + thick = ds.millan_ice_thickness.where(mask).data + + # Simply some coverage and sanity checks + assert np.isfinite(thick).sum() / mask.sum() > 0.98 + assert np.nanmax(thick) > 800 + assert np.nansum(thick) * gdir.grid.dx**2 * 1e-9 > 174 + + # Velocity + millan22.velocity_to_gdir(gdir) + + with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: + mask = ds.glacier_mask.data.astype(bool) + v = ds.millan_v.where(mask).data + vx = ds.millan_vx.where(mask).data + vy = ds.millan_vy.where(mask).data + + # Simply some coverage and sanity checks + assert np.isfinite(v).sum() / mask.sum() > 0.98 + assert np.nanmax(v) > 2000 + + # Stats + df = millan22.compile_millan_statistics([gdir]).iloc[0] + assert df['millan_avg_vel'] > 180 + assert df['millan_max_vel'] > 2000 + assert df['millan_perc_cov'] > 0.95 + assert df['millan_vol_km3'] > 174 + + class Test_rgitopo: def test_from_dem(self, class_case_dir, monkeypatch):