In [1]:
import json
import pdal
import pathlib


In [2]:
from dask.distributed import Client, as_completed, SSHCluster


# Reclassify AHN1 ground points
The ground points have the field `raw_classification` set as 0 (never classified). It is necessary to set this to 2 (ground points) for the calculation of the pulse penetratio ratio.

## Set input

In [3]:
input_path = pathlib.Path('/project/lidarac/Software/Yifang/JupyterDaskOnSLURM/Spain_test/rawlas')
# input_path = pathlib.Path('/data/local/tmp')
output_path = pathlib.Path('/project/lidarac/Software/Yifang/JupyterDaskOnSLURM/Spain_test/reclassified')
# output_path = pathlib.Path('/data/local/tmp/reclassified')

run = 'all' # 'all', 'from_file'
filename = 'reclassification_failed.json'  # if run is 'from_file', set name of file with input file names
assert run in ['all', 'from_file']

In [4]:
files = [el for el in input_path.iterdir() if el.suffix == '.laz']
print('Found: {} LAZ files'.format(len(files)))
if run == 'from_file':
    with open(filename, 'r') as f:
        files_read = json.load(f)
    files_read = [pathlib.Path(f) for f in files_read]
    # check whether all files are available 
    assert all([f in files for f in files_read]), f'Some of the files in {filename} are not in remote dir'
    files = files_read
print('Retrieve and reclassify: {} LAZ files'.format(len(files)))

Found: 2 LAZ files
Retrieve and reclassify: 2 LAZ files


## Connect to Dask cluster

In [None]:
# local_tmp = pathlib.Path('/pnfs/grid.sara.nl/data/projects.nl/eecolidar/02_UvA/YShi/temp')
#local_tmp = pathlib.Path('/data/local/tmp')
local_tmp = pathlib.Path('/project/lidarac/Data/temp')
nprocs_per_node = 1 

# start the cluster
scheduler_node = 'node1'
hosts = [f'node{i}' for i in range(1, 11)]
# hosts = [host for host in hosts if host not in ['node7', 'node9', 'node10']]
# hosts = ['node1']
cluster = SSHCluster(hosts=[scheduler_node] + hosts, 
                     connect_options={'known_hosts': None, 
                                      'username': 'ubuntu', 
                                      'client_keys': '/home/ubuntu/.ssh/id_rsa'},
                     worker_options={'nthreads': 1, 
                                     'nprocs': nprocs_per_node,
                                     'memory_limit': 'auto',
                                     'local_directory': local_tmp/'dask-worker-space'}, 
                     scheduler_options={'dashboard_address': '8787'})
cluster

In [8]:
local_tmp = pathlib.Path('/project/lidarac/Data/temp')

In [5]:
client = Client("tcp://10.0.1.193:39583")
client

0,1
Connection method: Direct,
Dashboard: /proxy/8787/status,

0,1
Comm: tcp://10.0.1.193:39583,Workers: 2
Dashboard: /proxy/8787/status,Total threads: 8
Started: 4 minutes ago,Total memory: 60.00 GiB

0,1
Comm: tcp://10.0.0.10:45501,Total threads: 4
Dashboard: /proxy/8787/status,Memory: 30.00 GiB
Nanny: tcp://10.0.0.10:42201,
Local directory: /tmp/dask-worker-space/worker-voofa5co,Local directory: /tmp/dask-worker-space/worker-voofa5co
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 6.0%,Last seen: Just now
Memory usage: 110.95 MiB,Spilled bytes: 0 B
Read bytes: 285.88889720381684 B,Write bytes: 0.94 kiB

0,1
Comm: tcp://10.0.2.188:33327,Total threads: 4
Dashboard: /proxy/8787/status,Memory: 30.00 GiB
Nanny: tcp://10.0.2.188:44775,
Local directory: /tmp/dask-worker-space/worker-g2zyb95u,Local directory: /tmp/dask-worker-space/worker-g2zyb95u
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 2.0%,Last seen: Just now
Memory usage: 94.04 MiB,Spilled bytes: 0 B
Read bytes: 285.7556877383638 B,Write bytes: 0.94 kiB


## Classification using PDAL

In [11]:
def classify_as_ground_points(input_file, output_file):
    PDAL_pipeline_dict = {
        "pipeline": [
            {
                "tag": "ground_laz",
                "type": "readers.las",
                "filename": input_file
            },
            {
                "type": "filters.assign",
                "assignment": "Classification[:]=2",
                "tag": "ground_classed"
            },
            {
                "type": "writers.las",
                "filename": output_file,
                "forward": ["scale_x", "scale_y", "scale_z"],
                "offset_x": "auto",
                "offset_y": "auto",
                "offset_z": "auto"
            }
        ]
    }
    PDAL_pipeline = pdal.Pipeline(json.dumps(PDAL_pipeline_dict))
    PDAL_pipeline.execute()

## Run!

In [12]:
out_files = [output_path/f.name.replace('.laz', '_reclassified.laz') for f in files]

In [13]:
futures = [client.submit(classify_as_ground_points,
                         input_file.as_posix(),
                         output_file.as_posix())
           for input_file, output_file in zip(files, out_files)]
map_key_to_index = {future.key: n for n, future in enumerate(futures)}
errors = [None] * len(files)
outcome = [future.status for future in futures]
for future, result in as_completed(futures,
                                   with_results=True,
                                   raise_errors=False):
    idx = map_key_to_index[future.key]
    outcome[idx] = future.status
    exc = future.exception()
    if exc is not None:
        errors[idx] = (type(exc), exc)
    future.release()

In [14]:
with open('reclassification.out', 'w') as fd:    
    for nt, (out, err, file) in enumerate(zip(outcome,
                                              errors,
                                              files)):
        if err is None:
            s = out
        else:
            s = '{}: {}, {}'.format(out, err[0].__name__, err[1])
        fd.write('{:03d} {:30s} {}\n'.format(nt+1, file.name, s))

In [15]:
failed = [f.as_posix() for out, f in zip(outcome, files) if out != 'finished']
if failed:
    with open('reclassification_failed.json', 'w') as f:
        json.dump(failed, f)
    raise RuntimeError('Some of the reclassifications have failed')

In [14]:
client.close()

In [None]:
client.shutdown()