In [3]:
import fnmatch
import json
import getpass
import os
import pathlib
import datetime
import laspy


import time
import requests
                    
from dask.distributed import LocalCluster, SSHCluster 
from laserfarm import Retiler, DataProcessing, GeotiffWriter, MacroPipeline
from laserfarm.remote_utils import get_wdclient, get_info_remote, list_remote

## Global Configuration

In [4]:
import fnmatch
import json
import getpass
import os
import pathlib
import datetime

from dask.distributed import LocalCluster, SSHCluster 
from laserfarm import Retiler, DataProcessing, GeotiffWriter, MacroPipeline
from laserfarm.remote_utils import get_wdclient, get_info_remote, list_remote

conf_username = 'myname'
if 'JUPYTERHUB_USER' in os.environ:
    conf_username = os.environ['JUPYTERHUB_USER']
    
conf_remote_path_root = '/webdav/LAZ'
conf_remote_path_split = pathlib.Path(conf_remote_path_root + '/split_'+conf_username)
conf_remote_path_retiled = pathlib.Path(conf_remote_path_root + '/retiled_'+conf_username)
conf_remote_path_norm = pathlib.Path(conf_remote_path_root + '/norm_'+conf_username)
conf_remote_path_targets = pathlib.Path(conf_remote_path_root + '/targets_'+conf_username)
conf_local_tmp = pathlib.Path('/tmp')
conf_remote_path_ahn = conf_remote_path_root


param_hostname = ''
param_login = ''
param_password = ''

conf_feature_name = 'perc_95_normalized_height'
conf_validate_precision = '0.001'
conf_tile_mesh_size = '10.'
conf_filter_type= 'select_equal'
conf_attribute = 'raw_classification'
conf_min_x = '-113107.81'
conf_max_x = '398892.19'
conf_min_y = '214783.87'
conf_max_y = '726783.87'
conf_n_tiles_side = '512'
conf_apply_filter_value = '1'
conf_laz_compression_factor = '7'
conf_max_filesize = '262144000'  # desired max file size (in bytes)

<class 'str'>


## Fetching Laz Files from remote WebDAV

In [5]:
# Fetch Laz Files
conf_wd_opts = { 'webdav_hostname': param_hostname, 'webdav_login': param_login, 'webdav_password': param_password}
laz_files = [f for f in list_remote(get_wdclient(conf_wd_opts), pathlib.Path(conf_remote_path_ahn).as_posix())
             if f.lower().endswith('.laz')]


## Splitting big files into smaller files before retiling
This step can be added if the original files are too large for normal VMs to process

In [4]:
# split big files dbg

for file in laz_files:
    print('Splitting: '+file )

Splitting: C_01GZ2.LAZ
Splitting: C_02CZ1.LAZ
Splitting: C_01GN1.LAZ
Splitting: C_01GN2.LAZ


In [5]:
# split big files
import numpy as np

def save_chunk_to_laz_file(in_filename, 
                           out_filename, 
                           offset, 
                           n_points):
    """Read points from a LAS/LAZ file and write them to a new file."""
    
    points = np.array([])
    
    with laspy.open(in_filename) as in_file:
        with laspy.open(out_filename, 
                        mode="w", 
                        header=in_file.header) as out_file:
            in_file.seek(offset)
            points = in_file.read_points(n_points)
            out_file.write_points(points)
    return len(points)

def split_strategy(filename, max_filesize):
    """Set up splitting strategy for a LAS/LAZ file."""
    with laspy.open(filename) as f:
        bytes_per_point = (
            f.header.point_format.num_standard_bytes +
            f.header.point_format.num_extra_bytes
        )
        n_points = f.header.point_count
    n_points_target = int(
        max_filesize * int(conf_laz_compression_factor) / bytes_per_point
    )
    stem, ext = os.path.splitext(filename)
    return [
        (filename, f"{stem}-{n}{ext}", offset, n_points_target)
        for n, offset in enumerate(range(0, n_points, n_points_target))
    ]

from webdav3.client import Client

client = Client(conf_wd_opts)
client.mkdir(conf_remote_path_split.as_posix())


remote_path_split = conf_remote_path_split

for file in laz_files:
    print('Splitting: '+file)
    client.download_sync(remote_path=os.path.join(conf_remote_path_ahn,file), local_path=file)
    inps = split_strategy(file, int(conf_max_filesize))
    for inp in inps:
        save_chunk_to_laz_file(*inp)
    client.upload_sync(remote_path=os.path.join(conf_remote_path_split,file), local_path=file)

    for f in os.listdir('.'):
        if not f.endswith('.LAZ'):
            continue
        os.remove(os.path.join('.', f))
        
remote_path_retiled = str(conf_remote_path_retiled)

Splitting: C_01GZ2.LAZ
Splitting: C_02CZ1.LAZ
Splitting: C_01GN1.LAZ
Splitting: C_01GN2.LAZ


In [6]:
# Fetch split Laz Files
remote_path_retiled

split_laz_files = [f for f in list_remote(get_wdclient(conf_wd_opts), pathlib.Path(conf_remote_path_ahn).as_posix())
             if f.lower().endswith('.laz')]

## Retiling of big files into smaller tiles

In [1]:
# Retiling

grid_retile = {
    'min_x': float(conf_min_x),
    'max_x': float(conf_max_x),
    'min_y': float(conf_min_y),
    'max_y': float(conf_max_y),
    'n_tiles_side': int(conf_n_tiles_side)
}

retiling_input = {
    'setup_local_fs': {'tmp_folder': conf_local_tmp.as_posix()},
    'pullremote': conf_remote_path_split.as_posix(),
    'set_grid': grid_retile,
    'split_and_redistribute': {},
    'validate': {},
    'pushremote': conf_remote_path_retiled.as_posix(),
    'cleanlocalfs': {}
}

for file in split_laz_files:
    print('Retiling: '+file)
    retiler = Retiler(file.replace('"',''),label=file).config(retiling_input).setup_webdav_client(conf_wd_opts)
    retiler_output = retiler.run()
    
    
remote_path_retiled = conf_remote_path_retiled.as_posix()
print(type(remote_path_retiled))

NameError: name 'conf_min_x' is not defined

In [8]:
print(retiler_output)

None


In [9]:
# Fetch Tiles
remote_path_retiled

tiles = [t.strip('/') for t in list_remote(get_wdclient(conf_wd_opts), conf_remote_path_retiled.as_posix())
         if fnmatch.fnmatch(t, 'tile_*_*/')]

In [None]:
# Feature Extraction
    
for t in tiles:
    features = [conf_feature_name]

    tile_mesh_size = float(conf_tile_mesh_size)

    grid_feature = {
        'min_x': float(conf_min_x),
        'max_x': float(conf_max_x),
        'min_y': float(conf_min_y),
        'max_y': float(conf_max_y),
        'n_tiles_side': int(conf_n_tiles_side)
    }

    feature_extraction_input = {
        'setup_local_fs': {'tmp_folder': conf_local_tmp.as_posix()},
        'pullremote': conf_remote_path_retiled.as_posix(),
        'load': {'attributes': [conf_attribute]},
        'normalize': 1,
        'apply_filter': {
            'filter_type': conf_filter_type, 
            'attribute': conf_attribute,
            'value': [int(conf_apply_filter_value)]#ground surface (2), water (9), buildings (6), artificial objects (26), vegetation (?), and unclassified (1)
        },
        'generate_targets': {
            'tile_mesh_size' : tile_mesh_size,
            'validate' : True,
            'validate_precision': float(conf_validate_precision),
            **grid_feature
        },
        'extract_features': {
            'feature_names': features,
            'volume_type': 'cell',
            'volume_size': tile_mesh_size
        },
        'export_targets': {
            'attributes': features,
            'multi_band_files': False
        },
        'pushremote': conf_remote_path_targets.as_posix(),
    #     'cleanlocalfs': {}
    }
    idx = (t.split('_')[1:])

    processing = DataProcessing(t, tile_index=idx,label=t).config(feature_extraction_input).setup_webdav_client(conf_wd_opts)
    processing.run()

2023-01-11 03:09:33,382 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/tile_278_391_input
2023-01-11 03:09:33,382 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/tile_278_391_output
2023-01-11 03:09:33,384 -           laserfarm.pipeline_remote_data -       INFO - Pulling from WebDAV /webdav/LAZ/retiled_skoulouzis/tile_278_391 ...
2023-01-11 03:09:40,344 -           laserfarm.pipeline_remote_data -       INFO - ... pulling completed.
2023-01-11 03:09:40,346 -                laserfarm.data_processing -       INFO - Loading point cloud data ...
2023-01-11 03:09:40,348 -                laserfarm.data_processing -       INFO - ... loading /tmp/tile_278_391_input/tile_278_391/C_01GN1_9.LAZ
2023-01-11 03:09:40,617 -                laserfarm.data_processing -       INFO - ... loading /tmp/tile_278_391_input/tile_278_391/C_01GN2_1.LAZ
2023-01-11 03:09:42,373 -                laserfarm.data_processing -       INFO - ... loading /t