In [1]:
import copy
import fnmatch
import json
import getpass
import os
import pathlib
import datetime
                    
from dask.distributed import LocalCluster, SSHCluster 
from laserfarm import Retiler, DataProcessing, GeotiffWriter, MacroPipeline
from laserfarm.remote_utils import get_wdclient, get_info_remote, list_remote

def last_modified(opts, remote_path):
    info = get_info_remote(get_wdclient(opts), remote_path.as_posix())
    format_ = '%a, %d %b %Y %H:%M:%S GMT'
    return datetime.datetime.strptime(info['modified'], format_)

# Macro-Pipeline Workflow - GeoTIFF Export (Mask)

## Set Run-Specific Input

Fill in the username/password for the SURF dCache. 

In [2]:
path_root = pathlib.Path('/project/lidarac/Data')

# We have mounted the dCache system to read input
path_input = path_root / 'AHN4_mask/TOP10NL_2021_shapefiles/targets_all'

# dCache path where to copy the geotiff files
path_output = path_root / 'AHN4_mask/TOP10NL_2021_shapefiles/geotiff_classified'

## Setup Cluster

Setup Dask cluster used for all the macro-pipeline calculations.

In [None]:
from dask.distributed import Client

client = Client("tcp://10.0.1.207:41553")
client

0,1
Connection method: Direct,
Dashboard: /proxy/8787/status,

0,1
Comm: tcp://10.0.1.207:41553,Workers: 2
Dashboard: /proxy/8787/status,Total threads: 12
Started: 28 minutes ago,Total memory: 96.00 GiB

0,1
Comm: tcp://10.0.0.88:42737,Total threads: 6
Dashboard: /proxy/8787/status,Memory: 48.00 GiB
Nanny: tcp://10.0.0.88:41593,
Local directory: /tmp/dask-scratch-space/worker-k1xon92g,Local directory: /tmp/dask-scratch-space/worker-k1xon92g
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 162.21 MiB,Spilled bytes: 0 B
Read bytes: 1.03 GiB,Write bytes: 909.60 kiB

0,1
Comm: tcp://10.0.2.120:33031,Total threads: 6
Dashboard: /proxy/8787/status,Memory: 48.00 GiB
Nanny: tcp://10.0.2.120:35801,
Local directory: /tmp/dask-scratch-space/worker-tfr986gf,Local directory: /tmp/dask-scratch-space/worker-tfr986gf
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 162.84 MiB,Spilled bytes: 0 B
Read bytes: 786.77 MiB,Write bytes: 786.51 kiB


## GeoTIFF Export

Export the rasterized features from the target grid to GeoTIFF files.

In [7]:
# output handle: AHN4 dataset, mask for building, road and water, target grid spacing 10m
output_handle = 'ahn4_mask-building-road-water_10m'

# setup input dictionary to configure the geotiff export pipeline
# NOTE: to export the geotiffs we have mounted the dCache storage with rclone
geotiff_export_input_classification = {
    'setup_local_fs': {
        'input_folder': path_input,
        'output_folder': path_output
    },
    'parse_point_cloud': {},
    'data_split': {'xSub': 1, 'ySub': 1},
    'create_subregion_geotiffs': {'output_handle': output_handle},
    'pushremote': path_output.as_posix(),
#     'cleanlocalfs': {}  # DO NOT CLEAN - it would erase remote input..
}


# write input dictionary to JSON file
with open('geotiff_export_input_classification.json', 'w') as f:
    json.dump(geotiff_export_input_classification, f)

In [None]:
macro = MacroPipeline()

gw = GeotiffWriter(bands='ground_type', label='ground_type').setup_webdav_client(wd_opts).config(geotiff_export_input_classification)
macro.add_task(gw)

macro.setup_cluster(cluster=cluster)

# run!
macro.run()

# save outcome results and write name of failed pipelines to file
macro.print_outcome(to_file='geotiff_export_classification.out')
failed = macro.get_failed_pipelines()
if failed:
    with open('geotiff_export_classification_failed.json', 'w') as f:
        json.dump([pip.label for pip in failed], f)
    raise RuntimeError('Some of the pipelines have failed')

In [None]:
from dask.distributed import Client, Future
client = Client('tcp://145.100.59.123:8786')
futures = [Future(key) for key in client.who_has().keys()]
client.cancel(futures)

## Terminate cluster

In [None]:
macro.shutdown()