In [1]:
import gc
import logging

import nivapy3 as nivapy
import rioxarray as rio

# Split DTM by vassdragsområde

***Note:** With 16-bit integer input grids, this notebook will run on a machine with 240 GB of RAM. For 32-bit grids, use something larger.*

The merged national DTM is very large: at 10 m resolution it's ~136 GB using float64 data type (which is the default for many hydrological processing tools). To avoid having to run the final tool on a huge machine, this notebook splits each dataset (10 m, 20 m and 40 m) into 262 chunks based on vassdragsområder.

Each vassdragsområde is first "buffered" by 5 km to ensure the resulting clipped DTM contains the "true" watershed.

## 1. User options

**Note:** If running this on the machine with 1 TB of RAM, a good tip is to make 3 copies of this notebook and then run them simultaneously (one for each DEM resolution). Processing the 40 m dataset takes a few hours, the 20 m dataset takes about 8 hours and the 10 m dataset about 32 hours.

In [2]:
res_list = [10, 20, 40]
crs = "epsg:25833"
buff_dist_m = 5000
log_file = "split_by_vassom.log"

In [3]:
# Setup logging
logging.basicConfig(
    filename=log_file,
    format="%(asctime)s %(message)s",
    datefmt="%Y-%m-%d %H:%M",
    encoding="utf-8",
    filemode='w',
    level=logging.INFO,
)
print(f"Logging progress to '{log_file}'.")
logging.info("Started")

Logging progress to 'split_by_vassom.log'.


## 2. Get vassdragsområder

And apply the user-specified buffer.

In [4]:
%%time

logging.info("Getting vassdragsområder")
eng = nivapy.da.connect_postgis()
vass_gdf = nivapy.da.read_postgis("physical", "norway_nve_vassdragomrade_poly", eng)
vass_gdf = vass_gdf.to_crs(crs)
vass_gdf.geometry = vass_gdf.geometry.buffer(buff_dist_m)
vass_gdf.head()

Connection successful.
CPU times: user 30.4 s, sys: 183 ms, total: 30.6 s
Wall time: 31.2 s


Unnamed: 0,objekttype,vassdragsomradenr,vassdragsomrade,arealland_km2,areal_km2,geom,id
0,Vassdragsområde,1,Haldenvassdraget/Iddefjorden,2495,2507,"POLYGON ((278923.044 6559174.063, 279135.208 6...",1
1,Vassdragsområde,3,Mossevassdraget/kyst Onsøy-Son,854,1052,"POLYGON ((244754.301 6589646.324, 244757.918 6...",3
2,Vassdragsområde,4,Hølenelva/Drøbaksundet øst,204,227,"POLYGON ((249865.366 6603296.526, 249430.213 6...",4
3,Vassdragsområde,5,Nesodden og Bunnefjorden,279,368,"POLYGON ((244637.127 6630728.383, 244467.295 6...",5
4,Vassdragsområde,6,Nordmarkvassdraget/kyst Gjersjøelva-Bygdøy,391,414,"POLYGON ((243493.448 6682579.933, 243499.854 6...",6


## 3. Clip DTM data

In [None]:
%%time

for res in res_list:
    logging.info(f"Processing {res} m DTM")
    dtm_path = f"/home/jovyan/shared/01_datasets/spatial/dtm_merged_utm33/dtm_{res}m/norway_kartverket_{res}m_dtm_utm_z33.tif"
    rds = rio.open_rasterio(dtm_path, mask_and_scale=True)

    for vassom in sorted(vass_gdf["vassdragsomradenr"].unique()):
        logging.info(f"    Vassdragsområder {vassom}")
        gdf = vass_gdf.query("vassdragsomradenr == @vassom").copy()
        out_path = f"/home/jovyan/shared/01_datasets/spatial/dtm_merged_utm33/dtm_{res}m/by_vassom/dtm/vassom_{vassom}_{res}m_dtm.tif"
        rds_clip = rds.rio.clip(gdf.geometry, drop=True, invert=False)
        rds_clip.rio.to_raster(out_path, compress="lzw", BIGTIFF="IF_SAFER", tiled=True)
        rds_clip.close()
        del rds_clip
        gc.collect()
logging.info("Done")