diff --git a/oggm/cli/prepro_levels.py b/oggm/cli/prepro_levels.py index 73689218e..f3a7176af 100644 --- a/oggm/cli/prepro_levels.py +++ b/oggm/cli/prepro_levels.py @@ -360,7 +360,14 @@ def _time_log(): workflow.execute_entity_task(gis.rasterio_glacier_mask, gdirs, source='ALL') - # Compress all in output directory + # Glacier stats + sum_dir = os.path.join(output_base_dir, 'L1', 'summary') + utils.mkdir(sum_dir) + opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) + utils.compile_glacier_statistics(gdirs, path=opath) + + # L1 OK - compress all in output directory + log.workflow('L1 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) diff --git a/oggm/core/centerlines.py b/oggm/core/centerlines.py index a2655ae7c..141ed1c38 100644 --- a/oggm/core/centerlines.py +++ b/oggm/core/centerlines.py @@ -1170,7 +1170,10 @@ def _parabolic_bed_from_topo(gdir, idl, interpolator): roNx = roN - roHead # zN=zN-zHead# - p = _approx_parabola(roNx, zN, y0=zHead) + with warnings.catch_warnings(): + # This can trigger a divide by zero Warning + warnings.filterwarnings("ignore", category=RuntimeWarning) + p = _approx_parabola(roNx, zN, y0=zHead) # define terrain height as the maximum height difference of points used # for the parabolic fitting and the bottom height @@ -2297,8 +2300,11 @@ def elevation_band_flowline(gdir, bin_variables=None, preserve_totals=True): df.loc[bi, 'slope'] = utils.clip_scalar(avg_s, min_alpha, max_alpha) # Binned variables - for var, data in zip(bin_variables, out_vars): - df.loc[bi, var] = np.nanmean(data[bin_coords]) + with warnings.catch_warnings(): + # This can trigger an empty mean warning + warnings.filterwarnings("ignore", category=RuntimeWarning) + for var, data in zip(bin_variables, out_vars): + df.loc[bi, var] = np.nanmean(data[bin_coords]) # The grid point's grid spacing and widths df['bin_elevation'] = (bins[1:] + bins[:-1]) / 2 @@ -2405,7 +2411,10 @@ def fixed_dx_elevation_band_flowline(gdir, bin_variables=None, in_total = np.nansum(df[var] * df['area']) out_total = np.nansum(interp * widths_m * dx_meter) if out_total > 0: - interp *= in_total / out_total + with warnings.catch_warnings(): + # This can trigger a double error + warnings.filterwarnings("ignore", category=RuntimeWarning) + interp *= in_total / out_total odf[var] = interp odf.to_csv(gdir.get_filepath('elevation_band_flowline', filesuffix='_fixed_dx')) diff --git a/oggm/core/gis.py b/oggm/core/gis.py index 2e393c62e..baad91a25 100644 --- a/oggm/core/gis.py +++ b/oggm/core/gis.py @@ -800,12 +800,11 @@ def proj(x, y): nc.min_h_glacier = np.nanmin(dem_on_g) -@entity_task(log, writes=['gridded_data', 'hypsometry']) -def simple_glacier_masks(gdir, write_hypsometry=False): +@entity_task(log, writes=['gridded_data']) +def simple_glacier_masks(gdir): """Compute glacier masks based on much simpler rules than OGGM's default. - This is therefore more robust: we use this function to compute glacier - hypsometries. + This is therefore more robust: we use this task in a elev_bands workflow. Parameters ---------- @@ -920,12 +919,38 @@ def simple_glacier_masks(gdir, write_hypsometry=False): raise InvalidDEMError('({}) min equal max in the masked DEM.' .format(gdir.rgi_id)) - # hypsometry if asked for - if not write_hypsometry: - return + +@entity_task(log, writes=['hypsometry']) +def compute_hypsometry_attributes(gdir, min_perc=0.2): + """Adds some attributes to the glacier directory. + + Mostly useful for RGI stuff. + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + where to write the data + """ + dem = read_geotiff_dem(gdir) + + # This is the very robust way + fp = gdir.get_filepath('glacier_mask') + with rasterio.open(fp, 'r', driver='GTiff') as ds: + glacier_mask = ds.read(1).astype(rasterio.int16) == 1 + + fp = gdir.get_filepath('glacier_mask', filesuffix='_exterior') + with rasterio.open(fp, 'r', driver='GTiff') as ds: + glacier_exterior_mask = ds.read(1).astype(rasterio.int16) == 1 + + valid_mask = glacier_mask & np.isfinite(dem) + # we cant proceed if we have very little info + avail_perc = valid_mask.sum() / glacier_mask.sum() + if avail_perc < min_perc: + raise InvalidDEMError(f"Cant proceed with {avail_perc*100:.1f}%" + f"available.") bsize = 50. - dem_on_ice = dem[glacier_mask] + dem_on_ice = dem[valid_mask] bins = np.arange(nicenumber(dem_on_ice.min(), bsize, lower=True), nicenumber(dem_on_ice.max(), bsize) + 0.01, bsize) @@ -945,32 +970,57 @@ def simple_glacier_masks(gdir, write_hypsometry=False): # slope sy, sx = np.gradient(dem, gdir.grid.dx) - aspect = np.arctan2(np.mean(-sx[glacier_mask]), np.mean(sy[glacier_mask])) + aspect = np.arctan2(np.nanmean(-sx[glacier_mask]), np.nanmean(sy[glacier_mask])) aspect = np.rad2deg(aspect) if aspect < 0: aspect += 360 slope = np.arctan(np.sqrt(sx ** 2 + sy ** 2)) - avg_slope = np.rad2deg(np.mean(slope[glacier_mask])) + avg_slope = np.rad2deg(np.nanmean(slope[glacier_mask])) + + sec_bins = -22.5 + 45 * np.arange(9) + aspect_for_bin = aspect + if aspect_for_bin >= sec_bins[-1]: + aspect_for_bin -= 360 + aspect_sec = np.digitize(aspect_for_bin, sec_bins) + dx2 =gdir.grid.dx**2 * 1e-6 + + # Terminus loc + j, i = np.nonzero((dem[glacier_exterior_mask].min() == dem) & glacier_exterior_mask) + if len(j) > 2: + # We have a situation - take the closest to the euclidian center + mi, mj = np.mean(i), np.mean(j) + c = np.argmin((mi - i)**2 + (mj - j)**2) + j, i = j[[c]], i[[c]] + lon, lat = gdir.grid.ij_to_crs(i[0], j[0], crs=salem.wgs84) # write df = pd.DataFrame() - df['RGIId'] = [gdir.rgi_id] - df['GLIMSId'] = [gdir.glims_id] - df['Zmin'] = [dem_on_ice.min()] - df['Zmax'] = [dem_on_ice.max()] - df['Zmed'] = [np.median(dem_on_ice)] - df['Area'] = [gdir.rgi_area_km2] - df['Slope'] = [avg_slope] - df['Aspect'] = [aspect] + df['rgi_id'] = [gdir.rgi_id] + df['area_km2'] = [gdir.rgi_area_km2] + df['area_grid_km2'] = [glacier_mask.sum() * dx2] + df['valid_dem_area_km2'] = [valid_mask.sum() * dx2] + df['zmin_m'] = [np.nanmin(dem_on_ice)] + df['zmax_m'] = [np.nanmax(dem_on_ice)] + df['zmed_m'] = [np.nanmedian(dem_on_ice)] + df['zmean_m'] = [np.nanmean(dem_on_ice)] + df['terminus_lon'] = lon + df['terminus_lat'] = lat + df['slope_deg'] = [avg_slope] + df['aspect_deg'] = [aspect] + df['aspect_sec'] = [aspect_sec] + df['dem_source'] = [gdir.get_diagnostics()['dem_source']] for b, bs in zip(hi, (bins[1:] + bins[:-1])/2): df['{}'.format(np.round(bs).astype(int))] = [b] df.to_csv(gdir.get_filepath('hypsometry'), index=False) @entity_task(log, writes=['glacier_mask']) -def rasterio_glacier_mask(gdir, source=None): +def rasterio_glacier_mask(gdir, source=None, no_nunataks=False): """Writes a 1-0 glacier mask GeoTiff with the same dimensions as dem.tif + If no_nunataks, does the same but without nunataks. Writes a file + with the suffix "_no_nunataks" appended. + Parameters ---------- @@ -999,9 +1049,8 @@ def rasterio_glacier_mask(gdir, source=None): # read dem profile with rasterio.open(dempath, 'r', driver='GTiff') as ds: profile = ds.profile - - # don't even bother reading the actual DEM, just mimic it - data = np.zeros((ds.height, ds.width)) + # don't even bother reading the actual DEM, just mimic it + data = np.zeros((ds.height, ds.width)) # Read RGI outlines geometry = gdir.read_shapefile('outlines').geometry[0] @@ -1013,14 +1062,18 @@ def rasterio_glacier_mask(gdir, source=None): if not geometry.is_valid: raise InvalidDEMError('This glacier geometry is not valid.') + if no_nunataks: + mapping = shpg.mapping(shpg.Polygon(geometry.exterior)) + else: + mapping = shpg.mapping(geometry) + # Compute the glacier mask using rasterio # Small detour as mask only accepts DataReader objects with rasterio.io.MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(data.astype(profile['dtype'])[np.newaxis, ...]) dem_data = rasterio.open(memfile.name) - masked_dem, _ = riomask(dem_data, [shpg.mapping(geometry)], - filled=False) + masked_dem, _ = riomask(dem_data, [mapping], filled=False) glacier_mask = ~masked_dem[0, ...].mask # parameters to for the new tif @@ -1041,10 +1094,43 @@ def rasterio_glacier_mask(gdir, source=None): 'nodata': nodata, }) - with rasterio.open(gdir.get_filepath('glacier_mask'), 'w', **profile) as r: + if no_nunataks: + fp = gdir.get_filepath('glacier_mask', filesuffix='_no_nunataks') + else: + fp = gdir.get_filepath('glacier_mask') + + with rasterio.open(fp, 'w', **profile) as r: r.write(out.astype(dtype), 1) +@entity_task(log, writes=['glacier_mask']) +def rasterio_glacier_exterior_mask(gdir): + """Writes a 1-0 glacier exterior mask GeoTiff with the same dimensions as dem.tif + + This is the "one" grid point on the glacier exterior (ignoring nunataks). + This is useful to know where the terminus is, for example. + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + the glacier in question + """ + fp = gdir.get_filepath('glacier_mask', filesuffix='_no_nunataks') + with rasterio.open(fp, 'r', driver='GTiff') as ds: + glacier_mask_nonuna = ds.read(1).astype(rasterio.int16) == 1 + profile = ds.profile + + # Glacier exterior excluding nunataks + erode = binary_erosion(glacier_mask_nonuna) + glacier_ext = glacier_mask_nonuna ^ erode + glacier_ext = np.where(glacier_mask_nonuna, glacier_ext, 0) + + # parameters to for the new tif + fp = gdir.get_filepath('glacier_mask', filesuffix='_exterior') + with rasterio.open(fp, 'w', **profile) as r: + r.write(glacier_ext.astype(rasterio.int16), 1) + + @entity_task(log, writes=['gridded_data']) def gridded_attributes(gdir): """Adds attributes to the gridded file, useful for thickness interpolation. @@ -1454,7 +1540,7 @@ def proj(x, y): if not glacier_poly_hr.is_valid: raise RuntimeError('This glacier geometry is not valid.') - # Rounded geometry to nearest nearest pix + # Rounded geometry to nearest pix # I can not use _polyg # glacier_poly_pix = _polygon_to_pix(glacier_poly_hr) def project(x, y): diff --git a/oggm/shop/bedtopo.py b/oggm/shop/bedtopo.py index b9c0301d4..081cf553a 100644 --- a/oggm/shop/bedtopo.py +++ b/oggm/shop/bedtopo.py @@ -1,4 +1,5 @@ import logging +import warnings import os import numpy as np @@ -47,7 +48,11 @@ def add_consensus_thickness(gdir, base_url=None): dsb = salem.GeoTiff(input_file) thick = utils.clip_min(dsb.get_vardata(), 0) in_volume = thick.sum() * dsb.grid.dx ** 2 - thick = gdir.grid.map_gridded_data(thick, dsb.grid, interp='linear') + with warnings.catch_warnings(): + # This can trigger an out of bounds warning + warnings.filterwarnings("ignore", category=RuntimeWarning, + message='.*out of bounds.*') + thick = gdir.grid.map_gridded_data(thick, dsb.grid, interp='linear') # Correct for volume thick = utils.clip_min(thick.filled(0), 0) diff --git a/oggm/shop/hugonnet_maps.py b/oggm/shop/hugonnet_maps.py index 26c831763..2fc0535a6 100644 --- a/oggm/shop/hugonnet_maps.py +++ b/oggm/shop/hugonnet_maps.py @@ -1,4 +1,5 @@ import logging +import warnings from packaging.version import Version import numpy as np @@ -171,7 +172,10 @@ def hugonnet_statistics(gdir): dhdt = ds['hugonnet_dhdt'].where(ds['glacier_mask'], np.NaN).load() d['hugonnet_area_km2'] = float((~dhdt.isnull()).sum() * gdir.grid.dx ** 2 * 1e-6) d['hugonnet_perc_cov'] = float(d['hugonnet_area_km2'] / gdir.rgi_area_km2) - d['hugonnet_avg_dhdt'] = np.nanmean(dhdt.data) + with warnings.catch_warnings(): + # This can trigger an empty mean warning + warnings.filterwarnings("ignore", category=RuntimeWarning) + d['hugonnet_avg_dhdt'] = np.nanmean(dhdt.data) except (FileNotFoundError, AttributeError, KeyError): pass diff --git a/oggm/shop/its_live.py b/oggm/shop/its_live.py index 80eb49c12..bcdd7dfa1 100644 --- a/oggm/shop/its_live.py +++ b/oggm/shop/its_live.py @@ -1,4 +1,5 @@ import logging +import warnings import os import numpy as np @@ -103,8 +104,12 @@ def _reproject_and_scale(gdir, do_error=False): grid_gla = gdir.grid.center_grid proj_vel = dsx.grid.proj x0, x1, y0, y1 = grid_gla.extent_in_crs(proj_vel) - dsx.set_subset(corners=((x0, y0), (x1, y1)), crs=proj_vel, margin=4) - dsy.set_subset(corners=((x0, y0), (x1, y1)), crs=proj_vel, margin=4) + with warnings.catch_warnings(): + # This can trigger an out-of-bounds warning + warnings.filterwarnings("ignore", category=RuntimeWarning, + message='.*out of bounds.*') + dsx.set_subset(corners=((x0, y0), (x1, y1)), crs=proj_vel, margin=4) + dsy.set_subset(corners=((x0, y0), (x1, y1)), crs=proj_vel, margin=4) grid_vel = dsx.grid.center_grid # TODO: this should be taken care of by salem diff --git a/oggm/shop/millan22.py b/oggm/shop/millan22.py index 7ec9d524f..42cdcc338 100644 --- a/oggm/shop/millan22.py +++ b/oggm/shop/millan22.py @@ -79,9 +79,13 @@ def _filter_and_reproj(gdir, var, gdf, allow_neg=True): # Subset to avoid mega files dsb = salem.GeoTiff(input_file) x0, x1, y0, y1 = gdir.grid.extent_in_crs(dsb.grid.proj) - dsb.set_subset(corners=((x0, y0), (x1, y1)), - crs=dsb.grid.proj, - margin=5) + with warnings.catch_warnings(): + # This can trigger an out of bounds warning + warnings.filterwarnings("ignore", category=RuntimeWarning, + message='.*out of bounds.*') + dsb.set_subset(corners=((x0, y0), (x1, y1)), + crs=dsb.grid.proj, + margin=5) data = _filter(dsb) if not allow_neg: @@ -93,7 +97,11 @@ def _filter_and_reproj(gdir, var, gdf, allow_neg=True): continue # Reproject now - r_data = gdir.grid.map_gridded_data(data, dsb.grid, interp='linear') + with warnings.catch_warnings(): + # This can trigger an out of bounds warning + warnings.filterwarnings("ignore", category=RuntimeWarning, + message='.*out of bounds.*') + r_data = gdir.grid.map_gridded_data(data, dsb.grid, interp='linear') total_data += r_data.filled(0) grids_used.append(dsb) files_used.append(s.file_id) diff --git a/oggm/shop/rgitopo.py b/oggm/shop/rgitopo.py index b5ecab989..b3acebe2d 100644 --- a/oggm/shop/rgitopo.py +++ b/oggm/shop/rgitopo.py @@ -90,29 +90,27 @@ def select_dem_from_dir(gdir, dem_source=None, keep_dem_folders=False): gdir : :py:class:`oggm.GlacierDirectory` the glacier directory dem_source : str - the source to pick from (default: NASADEM and COPDEM90) + the source to pick from keep_dem_folders : bool the default is to delete the other DEM directories to save space. Set this to True to prevent that (e.g. for sensitivity tests) """ # Start by deleting noise - for fn in ['log.txt', 'diagnostics.json']: - fp = os.path.join(gdir.dir, fn) - if os.path.exists(fp): - os.remove(fp) + for fn in os.listdir(gdir.dir): + if fn in ['glacier_mask.tif', 'glacier_grid.json', + 'outlines.tar.gz', 'intersects.tar.gz']: + continue + fn = os.path.join(gdir.dir, fn) + if os.path.isfile(fn): + os.remove(fn) sources = [f.name for f in os.scandir(gdir.dir) if f.is_dir() and not f.name.startswith('.')] - - if dem_source is None: - for s in ['NASADEM', 'COPDEM90', 'GIMP', 'REMA', 'TANDEM', 'MAPZEN']: - if s in sources: - dem_source = s - if dem_source not in sources: raise RuntimeError('source {} not in folder'.format(dem_source)) + gdir.add_to_diagnostics('dem_source', dem_source) shutil.copyfile(os.path.join(gdir.dir, dem_source, 'dem.tif'), gdir.get_filepath('dem')) shutil.copyfile(os.path.join(gdir.dir, dem_source, 'dem_source.txt'), @@ -123,7 +121,11 @@ def select_dem_from_dir(gdir, dem_source=None, keep_dem_folders=False): shutil.rmtree(os.path.join(gdir.dir, source)) -@utils.entity_task(log, writes=[]) +def _fallback_dem_quality_check(gdir): + return dict(rgi_id=gdir.rgi_id) + + +@utils.entity_task(log, writes=[], fallback=_fallback_dem_quality_check) def dem_quality_check(gdir): """Run a simple quality check on the rgitopo DEMs @@ -146,7 +148,7 @@ def dem_quality_check(gdir): sources = [f.name for f in os.scandir(gdir.dir) if f.is_dir() and not f.name.startswith('.')] - out = {} + out = dict(rgi_id=gdir.rgi_id) for s in sources: try: with rasterio.open(os.path.join(gdir.dir, s, 'dem.tif'), 'r', @@ -154,19 +156,12 @@ def dem_quality_check(gdir): topo = ds.read(1).astype(rasterio.float32) topo[topo <= -999.] = np.NaN topo[ds.read_masks(1) == 0] = np.NaN - valid_mask = np.isfinite(topo) & mask if np.all(~valid_mask): continue - - z_on_glacier = topo[valid_mask] - zmax, zmin = np.nanmax(z_on_glacier), np.nanmin(z_on_glacier) - if zmin == zmax: + if np.nanmax(topo[valid_mask]) == 0: continue - out[s] = valid_mask.sum() / area - except BaseException: pass - return out diff --git a/oggm/tasks.py b/oggm/tasks.py index 3d789d5f0..6fc675066 100644 --- a/oggm/tasks.py +++ b/oggm/tasks.py @@ -8,6 +8,8 @@ from oggm.core.gis import glacier_masks from oggm.core.gis import simple_glacier_masks from oggm.core.gis import rasterio_glacier_mask +from oggm.core.gis import rasterio_glacier_exterior_mask +from oggm.core.gis import compute_hypsometry_attributes from oggm.core.gis import gridded_attributes from oggm.core.gis import gridded_mb_attributes from oggm.core.gis import gridded_data_var_to_geotiff diff --git a/oggm/tests/test_prepro.py b/oggm/tests/test_prepro.py index 5287f8a7f..45cf983d5 100644 --- a/oggm/tests/test_prepro.py +++ b/oggm/tests/test_prepro.py @@ -297,7 +297,7 @@ def test_simple_glacier_masks(self): gdir = oggm.GlacierDirectory(entity, base_dir=self.testdir) gis.define_glacier_region(gdir) - gis.simple_glacier_masks(gdir, write_hypsometry=True) + gis.simple_glacier_masks(gdir) with utils.ncDataset(gdir.get_filepath('gridded_data')) as nc: area = np.sum(nc.variables['glacier_mask'][:] * gdir.grid.dx**2) @@ -319,13 +319,33 @@ def test_simple_glacier_masks(self): assert np.all(df['dem_max_elev'] > df['dem_max_elev_on_ext']) + @pytest.mark.skipif((Version(rasterio.__version__) < + Version('1.0')), + reason='requires rasterio >= 1.0') + def test_compute_hypsometry_attributes(self): + + hef_file = get_demo_file('Hintereisferner_RGI5.shp') + entity = gpd.read_file(hef_file).iloc[0] + + gdir = oggm.GlacierDirectory(entity, base_dir=self.testdir) + gis.define_glacier_region(gdir) + gis.rasterio_glacier_mask(gdir) + gis.rasterio_glacier_mask(gdir, no_nunataks=True) + gis.rasterio_glacier_exterior_mask(gdir) + gis.compute_hypsometry_attributes(gdir) + dfh = pd.read_csv(gdir.get_filepath('hypsometry')) - np.testing.assert_allclose(dfh['Slope'], entity.Slope, atol=0.5) - np.testing.assert_allclose(dfh['Aspect'], entity.Aspect, atol=5) - np.testing.assert_allclose(dfh['Zmed'], entity.Zmed, atol=20) - np.testing.assert_allclose(dfh['Zmax'], entity.Zmax, atol=20) - np.testing.assert_allclose(dfh['Zmin'], entity.Zmin, atol=20) + np.testing.assert_allclose(dfh['slope_deg'], entity.Slope, atol=0.5) + np.testing.assert_allclose(dfh['aspect_deg'], entity.Aspect, atol=5) + np.testing.assert_allclose(dfh['zmed_m'], entity.Zmed, atol=20) + np.testing.assert_allclose(dfh['zmax_m'], entity.Zmax, atol=20) + np.testing.assert_allclose(dfh['zmin_m'], entity.Zmin, atol=20) + np.testing.assert_allclose(dfh['zmax_m'], entity.Zmax, atol=20) + np.testing.assert_allclose(dfh['zmin_m'], entity.Zmin, atol=20) + # From google map checks + np.testing.assert_allclose(dfh['terminus_lon'], 10.80, atol=0.01) + np.testing.assert_allclose(dfh['terminus_lat'], 46.81, atol=0.01) bins = [] for c in dfh.columns: @@ -360,7 +380,7 @@ def test_glacier_masks_other_glacier(self): # The test below does NOT pass on OGGM shutil.copyfile(gdir.get_filepath('gridded_data'), os.path.join(self.testdir, 'default_masks.nc')) - gis.simple_glacier_masks(gdir, write_hypsometry=True) + gis.simple_glacier_masks(gdir) with utils.ncDataset(gdir.get_filepath('gridded_data')) as nc: area = np.sum(nc.variables['glacier_mask'][:] * gdir.grid.dx**2) np.testing.assert_allclose(area*10**-6, gdir.rgi_area_km2, @@ -368,13 +388,6 @@ def test_glacier_masks_other_glacier(self): shutil.copyfile(gdir.get_filepath('gridded_data'), os.path.join(self.testdir, 'simple_masks.nc')) - dfh = pd.read_csv(gdir.get_filepath('hypsometry')) - np.testing.assert_allclose(dfh['Slope'], entity.Slope, atol=1) - np.testing.assert_allclose(dfh['Aspect'], entity.Aspect, atol=10) - np.testing.assert_allclose(dfh['Zmed'], entity.Zmed, atol=20) - np.testing.assert_allclose(dfh['Zmax'], entity.Zmax, atol=20) - np.testing.assert_allclose(dfh['Zmin'], entity.Zmin, atol=20) - @pytest.mark.skipif((Version(rasterio.__version__) < Version('1.0')), reason='requires rasterio >= 1.0') diff --git a/oggm/tests/test_shop.py b/oggm/tests/test_shop.py index de80e3631..b2bed678c 100644 --- a/oggm/tests/test_shop.py +++ b/oggm/tests/test_shop.py @@ -214,7 +214,8 @@ def test_from_dem(self, class_case_dir, monkeypatch): 'emen.de/~oggm/test_gdirs/dem' 's_v1/default/') - gd = rgitopo.init_glacier_directories_from_rgitopo(['RGI60-09.01004']) + gd = rgitopo.init_glacier_directories_from_rgitopo(['RGI60-09.01004'], + dem_source='COPDEM') gd = gd[0] assert gd.has_file('dem') @@ -237,10 +238,11 @@ def test_qc(self, class_case_dir, monkeypatch): 's_v1/default/') gd = rgitopo.init_glacier_directories_from_rgitopo(['RGI60-09.01004'], + dem_source='COPDEM', keep_dem_folders=True) out = rgitopo.dem_quality_check(gd[0]) assert len(out) > 5 - assert np.sum(list(out.values())) > 5 + assert pd.Series(out).iloc[1:].sum() > 5 class Test_w5e5: diff --git a/oggm/tests/test_utils.py b/oggm/tests/test_utils.py index 8a6d16525..b2eca6428 100644 --- a/oggm/tests/test_utils.py +++ b/oggm/tests/test_utils.py @@ -442,6 +442,11 @@ def test_glacier_statistics_and_climate(self): assert 'glc_ext_num_perc' in df.columns assert np.all(np.isfinite(df.glc_ext_num_perc.values)) + assert df['geometry_type'].iloc[0] == 'Polygon' + assert df['geometry_is_valid'].iloc[0] + np.testing.assert_allclose(df['geometry_area_km2'].iloc[0], + gdir.rgi_area_km2, + rtol=0.001) df = df.iloc[0] np.testing.assert_allclose(df['dem_mean_elev'], diff --git a/oggm/tests/test_workflow.py b/oggm/tests/test_workflow.py index c597515ae..9b52aeb77 100644 --- a/oggm/tests/test_workflow.py +++ b/oggm/tests/test_workflow.py @@ -453,3 +453,4 @@ def test_rgi7_glacier_dirs(): assert gdir assert gdir.rgi_region == '11' assert gdir.rgi_area_km2 > 8 + assert gdir.name == 'Hintereis Ferner' diff --git a/oggm/utils/_downloads.py b/oggm/utils/_downloads.py index ad2dd494a..19f4153b1 100644 --- a/oggm/utils/_downloads.py +++ b/oggm/utils/_downloads.py @@ -1297,7 +1297,11 @@ def _get_prepro_gdir_unlocked(rgi_version, rgi_id, border, prepro_level, url = get_prepro_base_url(rgi_version=rgi_version, border=border, prepro_level=prepro_level, base_url=base_url) - url += '{}/{}.tar' .format(rgi_id[:8], rgi_id[:11]) + if len(rgi_id) == 23: + # RGI7 + url += '{}/{}.tar'.format(rgi_id[:17], rgi_id[:20]) + else: + url += '{}/{}.tar'.format(rgi_id[:8], rgi_id[:11]) tar_base = file_downloader(url) if tar_base is None: raise RuntimeError('Could not find file at ' + url) diff --git a/oggm/utils/_funcs.py b/oggm/utils/_funcs.py index 44886a77e..9ff6f9c66 100644 --- a/oggm/utils/_funcs.py +++ b/oggm/utils/_funcs.py @@ -22,6 +22,7 @@ from scipy.interpolate import interp1d import shapely.geometry as shpg from shapely.ops import linemerge +from shapely.validation import make_valid # Optional libs try: @@ -519,9 +520,29 @@ def polygon_intersections(gdf): return out +def recursive_valid_polygons(geoms, crs): + """Given a list of shapely geometries, makes sure all geometries are valid + + All will be valid polygons of area > 10000m2""" + new_geoms = [] + for geom in geoms: + new_geom = make_valid(geom) + try: + new_geoms.extend(recursive_valid_polygons(list(new_geom.geoms), crs)) + except AttributeError: + new_s = gpd.GeoSeries(new_geom) + new_s.crs = crs + if new_s.to_crs({'proj': 'cea'}).area.iloc[0] >= 10000: + new_geoms.append(new_geom) + assert np.all([type(geom) == shpg.Polygon for geom in new_geoms]) + return new_geoms + + def multipolygon_to_polygon(geometry, gdir=None): """Sometimes an RGI geometry is a multipolygon: this should not happen. + It was vor vey old versions, pretty sure this is not needed anymore. + Parameters ---------- geometry : shpg.Polygon or shpg.MultiPolygon diff --git a/oggm/utils/_workflow.py b/oggm/utils/_workflow.py index fdf95a60e..c1c00e1ce 100644 --- a/oggm/utils/_workflow.py +++ b/oggm/utils/_workflow.py @@ -60,7 +60,8 @@ from oggm import __version__ from oggm.utils._funcs import (calendardate_to_hydrodate, date_to_floatyear, tolist, filter_rgi_name, parse_rgi_meta, - haversine, multipolygon_to_polygon, clip_scalar) + haversine, multipolygon_to_polygon, + recursive_valid_polygons) from oggm.utils._downloads import (get_demo_file, get_wgms_files, get_rgi_glacier_entities) from oggm import cfg @@ -1578,6 +1579,15 @@ def glacier_statistics(gdir, inversion_only=False, apply_func=None): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) + try: + # Geom stuff + outline = gdir.read_shapefile('outlines') + d['geometry_type'] = outline.type.iloc[0] + d['geometry_is_valid'] = outline.is_valid.iloc[0] + d['geometry_area_km2'] = outline.to_crs({'proj': 'cea'}).area.iloc[0] * 1e-6 + except BaseException: + pass + try: # Inversion if gdir.has_file('inversion_output'): @@ -1768,6 +1778,64 @@ def compile_glacier_statistics(gdirs, filesuffix='', path=True, return out +@entity_task(log) +def read_glacier_hypsometry(gdir): + """Utility function to read the glacier hypsometry in the folder. + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` object + the glacier directory to process + + Returns + ------- + the dataframe + """ + try: + out = pd.read_csv(gdir.get_filepath('hypsometry')).iloc[0] + except: + out = pd.Series({'rgi_id': gdir.rgi_id}) + return out + + +@global_task(log) +def compile_glacier_hypsometry(gdirs, filesuffix='', path=True, + add_column=None): + """Gather as much statistics as possible about a list of glaciers. + + It can be used to do result diagnostics and other stuffs. If the data + necessary for a statistic is not available (e.g.: flowlines length) it + will simply be ignored. + + Parameters + ---------- + gdirs : list of :py:class:`oggm.GlacierDirectory` objects + the glacier directories to process + filesuffix : str + add suffix to output file + path : str, bool + Set to "True" in order to store the info in the working directory + Set to a path to store the file to your chosen location + add_column : tuple + if you feel like adding a key - value pair to the compiled dataframe + """ + from oggm.workflow import execute_entity_task + + out_df = execute_entity_task(read_glacier_hypsometry, gdirs) + + out = pd.DataFrame(out_df).set_index('rgi_id') + if add_column is not None: + out[add_column[0]] = add_column[1] + if path: + if path is True: + out.to_csv(os.path.join(cfg.PATHS['working_dir'], + ('glacier_hypsometry' + + filesuffix + '.csv'))) + else: + out.to_csv(path) + return out + + @global_task(log) def compile_fixed_geometry_mass_balance(gdirs, filesuffix='', path=True, csv=False, @@ -2593,6 +2661,7 @@ def __init__(self, rgi_entity, base_dir=None, reset=False, # Do we want to use the RGI center point or ours? if cfg.PARAMS['use_rgi_area']: try: + # RGIv7 self.cenlon = float(rgi_entity.cenlon) self.cenlat = float(rgi_entity.cenlat) except AttributeError: @@ -2614,7 +2683,7 @@ def __init__(self, rgi_entity, base_dir=None, reset=False, '{:02d}'.format(int(rgi_entity.O2Region))) try: - name = str(rgi_entity.name) + name = rgi_entity.glac_name rgi_datestr = rgi_entity.src_date except AttributeError: # RGI V6 @@ -2782,21 +2851,23 @@ def __repr__(self): return '\n'.join(summary) + '\n' def _reproject_and_write_shapefile(self, entity): - # Make a local glacier map if cfg.PARAMS['map_proj'] == 'utm': - from pyproj.aoi import AreaOfInterest - from pyproj.database import query_utm_crs_info - utm_crs_list = query_utm_crs_info( - datum_name="WGS 84", - area_of_interest=AreaOfInterest( - west_lon_degree=self.cenlon, - south_lat_degree=self.cenlat, - east_lon_degree=self.cenlon, - north_lat_degree=self.cenlat, - ), - ) - proj4_str = utm_crs_list[0].code + if entity.get('utm_zone', False): + proj4_str = {'proj': 'utm', 'zone': entity['utm_zone']} + else: + from pyproj.aoi import AreaOfInterest + from pyproj.database import query_utm_crs_info + utm_crs_list = query_utm_crs_info( + datum_name="WGS 84", + area_of_interest=AreaOfInterest( + west_lon_degree=self.cenlon, + south_lat_degree=self.cenlat, + east_lon_degree=self.cenlon, + north_lat_degree=self.cenlat, + ), + ) + proj4_str = utm_crs_list[0].code elif cfg.PARAMS['map_proj'] == 'tmerc': params = dict(name='tmerc', lat_0=0., lon_0=self.cenlon, k=0.9996, x_0=0, y_0=0, datum='WGS84') @@ -2812,7 +2883,21 @@ def _reproject_and_write_shapefile(self, entity): # transform geometry to map project = partial(transform_proj, proj_in, proj_out) geometry = shp_trafo(project, entity['geometry']) - geometry = multipolygon_to_polygon(geometry, gdir=self) + if len(self.rgi_id) == 23 and (not geometry.is_valid or + type(geometry) != shpg.Polygon): + # In RGI7 we know that the geometries are valid in entry + # So we have to validate them after proj as well + # Try buffer first + geometry = geometry.buffer(0) + if not geometry.is_valid: + correct = recursive_valid_polygons([geometry], crs=proj4_str) + if len(correct) != 1: + raise RuntimeError('Cant correct this geometry') + geometry = correct[0] + if type(geometry) != shpg.Polygon: + raise ValueError(f'{self.rgi_id}: geometry not valid') + else: + geometry = multipolygon_to_polygon(geometry, gdir=self) # Save transformed geometry to disk entity = entity.copy() @@ -2886,7 +2971,7 @@ def rgi_area_km2(self): except KeyError: # RGI V7 _area = self.read_shapefile('outlines')['area_km2'] - return np.round(float(_area.iloc[0]), decimals=3) + return float(_area.iloc[0]) @lazy_property def intersects_ids(self): diff --git a/oggm/workflow.py b/oggm/workflow.py index 0774620cd..f5df094a5 100644 --- a/oggm/workflow.py +++ b/oggm/workflow.py @@ -240,9 +240,13 @@ def gdir_from_prepro(entity, from_prepro_level=None, prepro_border = int(cfg.PARAMS['border']) if prepro_rgi_version is None: prepro_rgi_version = cfg.PARAMS['rgi_version'] - try: - rid = entity.RGIId - except AttributeError: + + if isinstance(entity, pd.Series): + try: + rid = entity.RGIId + except AttributeError: + rid = entity.rgi_id + else: rid = entity tar_base = utils.get_prepro_gdir(prepro_rgi_version, rid, prepro_border, @@ -275,13 +279,18 @@ def _check_rgi_input(rgidf=None, err_on_lvl2=False): 'OGGM does not provide pre-processed directories for these.') # Check if dataframe or list of strs - try: - rgi_ids = rgidf.RGIId - # if dataframe we can also check for connectivity - if 'Connect' in rgidf and np.any(rgidf['Connect'] == 2): - if err_on_lvl2: - raise RuntimeError(msg) - except AttributeError: + is_dataframe = isinstance(rgidf, pd.DataFrame) + if is_dataframe: + try: + rgi_ids = rgidf.RGIId + # if dataframe we can also check for connectivity + if 'Connect' in rgidf and np.any(rgidf['Connect'] == 2): + if err_on_lvl2: + raise RuntimeError(msg) + except AttributeError: + # RGI7 + rgi_ids = rgidf.rgi_id + else: rgi_ids = utils.tolist(rgidf) # Check for Connectivity level 2 here as well not_good_ids = pd.read_csv(utils.get_demo_file('rgi6_ids_conn_lvl2.csv'), @@ -444,7 +453,6 @@ def init_glacier_directories(rgidf=None, *, reset=False, force=False, # List of str pass - if _isdir(from_tar): gdirs = execute_entity_task(gdir_from_tar, entities, from_tar=from_tar)