In [1]:
import gc
import glob
import os

import rioxarray as rio
from joblib import Parallel, delayed
from rasterio.enums import Resampling
from rioxarray.merge import merge_arrays
from tqdm.notebook import tqdm

# Reprojecting and merging the Norwegian national 10 m terrain dataset

***Note:** This notebook must be run on a machine with plenty of RAM. If the data are processed as 16-bit integers, peak memory consumption is around 120 GB. For 32-bit floats (recommended) it's 240 GB, which will crash the "El Gordo" machine. The 1 TB machine handles everything easily.*

For hydrological calculations we need a good quality DEM. Kartverket's **10 m DTM** is probably the best choice in most cases: the 1 m data is not available everywhere and is unnecessarily detailed, and a DTM is better than a DOM for hydrology (see [here](https://hoydedata.no/LaserInnsyn/help_no/index.htm?context=130)). The 10 m DTM can be downloaded in 50 x 50 km tiles from the Høydedata website (`Eksport > Landsdekkende > DTM10`). Data for the entire country are available as 254 tiles in UTM Zone 33 (~18 GB compressed).
 
This notebook merges the tiles into a single dataset for the entire country. Arrays are also compressed using the LZW algorithm to save space.

The national 10 m dataset for the whole country is then downsampled using bilinear interpolation to create 20 m and 40 m resolution versions. These will be useful for testing and they should also be sufficient for catchment delineation by themselves.

**Note:** a **conformal projection** (such as EPSG 3395; WGS 84 World Mercator) is best for watershed delineation, but UTM Zone 33N will probably be good enough here.

**Note 2:** I originally tried converting all the grids to 16-bit integer type as, with compression, this dramatically reduces disk space and memory requirements. However, it also leads to difficulties resolving flow directions flat areas, and it's ultimately more trouble than it's worth. I recommend preserving the original 32-bit floats where possible.

## 1. Adding large files to the JupyterHub

### 1.1. Using `wget`

At the time of writing, the Høydedata website provides a direct link to a zip archive containing all times in UTM zone 33. This can be downloaded easily by `cd`-ing into the download folder and running

    wget https://hoydedata.no/LaserServices/REST/DownloadFile.ashx?id=54 -O dtm_10m_utm_33.zip
    
This can then be unzipped using `unzip dtm_10m_utm_33.zip`.

### 1.2. Using `gdown`

(This was useful before I discovered Kartverket's direct link).

Usually it is possible to upload files to the Hub by dragging and dropping from your local file system via your browser. However, for very large files (multiple GB), a faster and more robust solution is to use `gdown` and Google Drive.

The 10 m DTM tiles for each UTM zone are large. First, zip the files you want to upload (make sure you create a .zip archive and not a .7z). Then upload the zip to your Google Drive and make the file public (`Sharing > Anyone with the link can view`) and copy the sharable link. From a terminal on JupyterHub, cd into the folder where you want to add the file and run the following:

    gdown SHARABLE_LINK_COPIED_FROM_GOOGLE_DRIVE --fuzzy
    
This should quickly add the data to the Hub. You can unzip the file from the command line using `unzip filename.zip`.

## 2. User input

In [2]:
# Properties for mosaic
dst_crs = "EPSG:25833"
bbox = (-80000, 6440000, 1122000, 7950000)  # xmin, ymin, xmax, ymax
no_data_val = -32767
dst_dtype = "float32"  # Rasterio dtypes: https://test2.biogeo.ucdavis.edu/rasterio/_modules/rasterio/dtypes.html
dst_res = 10

# Set values <= 0 to NaN?
neg_to_nan = False

# Rasters to create
nor_10m_dtm = r"/home/jovyan/shared/01_datasets/spatial/dtm_merged_utm33/dtm_10m/norway_kartverket_10m_dtm_utm_z33.tif"
nor_20m_dtm = r"/home/jovyan/shared/01_datasets/spatial/dtm_merged_utm33/dtm_20m/norway_kartverket_20m_dtm_utm_z33.tif"
nor_40m_dtm = r"/home/jovyan/shared/01_datasets/spatial/dtm_merged_utm33/dtm_40m/norway_kartverket_40m_dtm_utm_z33.tif"

# Number of workers if choose to run section 3 in parallel
n_jobs = 35

## 3. Merge to a single dataset

In [3]:
%%time

search_path = "/home/jovyan/shared/01_datasets/spatial/dtm_10_raw/utm_33/*.tif"
flist = sorted(glob.glob(search_path))
print(len(flist), "files to process.")

print("Opening files...")
srcs = [rio.open_rasterio(fpath, mask_and_scale=True, cache=False) for fpath in flist]

print("Merging tiles...")
rds = merge_arrays(srcs, bounds=bbox, res=dst_res)

print("Saving...")
rds.rio.write_nodata(no_data_val, inplace=True)
rds.rio.to_raster(
    nor_10m_dtm,
    compress="lzw",
    BIGTIFF="YES",
    tiled=True,
    dtype=dst_dtype,
)
srcs = [src.close() for src in srcs]
rds.close()
del srcs, rds
gc.collect()

print("Done.")

254 files to process.
Opening files...
Merging tiles...
Saving...
Done.
CPU times: user 14min 48s, sys: 3min 35s, total: 18min 24s
Wall time: 20min 17s


## 4. Downsampling

In [4]:
%%time

print("Downsampling to 20m...")
rds = rio.open_rasterio(nor_10m_dtm, mask_and_scale=True, cache=False)
upscale_factor = 0.5
width = int(rds.rio.width * upscale_factor)
height = int(rds.rio.height * upscale_factor)

rds = rds.rio.reproject(
    rds.rio.crs,
    shape=(height, width),
    resampling=Resampling.bilinear,
)

rds.rio.to_raster(
    nor_20m_dtm, compress="lzw", BIGTIFF="IF_SAFER", tiled=True, dtype=dst_dtype
)
rds.close()
del rds
gc.collect()

Downsampling to 20m...
CPU times: user 16min 13s, sys: 4min 31s, total: 20min 45s
Wall time: 20min 56s


83

In [5]:
%%time

print("Downsampling to 40m...")
rds = rio.open_rasterio(nor_10m_dtm, mask_and_scale=True, cache=False)
upscale_factor = 0.25
width = int(rds.rio.width * upscale_factor)
height = int(rds.rio.height * upscale_factor)

rds = rds.rio.reproject(
    rds.rio.crs,
    shape=(height, width),
    resampling=Resampling.bilinear,
)

rds.rio.to_raster(
    nor_40m_dtm, compress="lzw", BIGTIFF="IF_SAFER", tiled=True, dtype=dst_dtype
)
rds.close()
del rds
gc.collect()

Downsampling to 40m...
CPU times: user 11min 24s, sys: 3min 50s, total: 15min 15s
Wall time: 15min 18s


101