In [13]:
import fnmatch
import json
import getpass
import os
import pathlib
import datetime
import laspy


import time
import requests
                    
from dask.distributed import LocalCluster, SSHCluster 
from laserfarm import Retiler, DataProcessing, GeotiffWriter, MacroPipeline
from laserfarm.remote_utils import get_wdclient, get_info_remote, list_remote

## Global Configuration

In [43]:
# Configurations spiros

import fnmatch
import json
import getpass
import os
import pathlib
import datetime
                    
from dask.distributed import LocalCluster, SSHCluster 
from laserfarm import Retiler, DataProcessing, GeotiffWriter, MacroPipeline
from laserfarm.remote_utils import get_wdclient, get_info_remote, list_remote

conf_remote_path_root = pathlib.Path('/webdav')
conf_remote_path_ahn = pathlib.Path('/webdav/ahn')
conf_remote_path_split = pathlib.Path('/webdav/split')
conf_remote_path_retiled = pathlib.Path('/webdav/retiled/')
conf_remote_path_norm = pathlib.Path('/webdav/norm/')
conf_remote_path_targets = pathlib.Path('/webdav/targets')
conf_local_tmp = pathlib.Path('/tmp')


param_hostname = ''
param_login = ''
param_password = ''

param_feature_name = 'perc_95_normalized_height'
param_validate_precision = '0.00001'
param_tile_mesh_size = '10.'
param_filter_type= 'select_equal'
param_attribute = 'raw_classification'
param_min_x = '-113107.81'
param_max_x = '398892.19'
param_min_y = '214783.87'
param_max_y = '726783.87'
param_n_tiles_side = '512'
param_apply_filter_value = '1'
param_laz_compression_factor = '7'
param_max_filesize = '262144000'  # desired max file size (in bytes)

conf_wd_opts = { 'webdav_hostname': param_hostname, 'webdav_login': param_login, 'webdav_password': param_password}


param_grafana_base_url = ''
param_grafana_token = ''

conf_notebook_name = ''
conf_grafana_verify_ssl = True

## Fetching Laz Files from remote WebDAV

In [44]:
# Fetch Laz Files 01-06-22

def send_annotation(start=None,end=None,message=None,tags=None):
    if not tags:
        tags = []
    
    tags.append(conf_notebook_name)
    
    headers = {
        'Accept':'application/json',
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+param_grafana_token
    }
    
    data ={
      "time":start,
      "timeEnd":end,
      "created": end,
      "tags":tags,
      "text": message
    }
    resp = requests.post(param_grafana_base_url+'/api/annotations',verify=conf_grafana_verify_ssl,headers=headers,json=data)


start = int(round(time.time() * 1000))

laz_files = [f for f in list_remote(get_wdclient(conf_wd_opts), conf_remote_path_ahn.as_posix())
             if f.lower().endswith('.laz')]
end = int(round(time.time() * 1000))
send_annotation(start=start,end=end,message='Fetch Laz Files 01-06-22')

b'{"id":3,"message":"Annotation added"}'


## Splitting big files into smaller files before retiling
This step can be added if the original files are too large for normal VMs to process

In [47]:
# split big files 01-60-22

import numpy as np

def send_annotation(start=None,end=None,message=None,tags=None):
    if not tags:
        tags = []
    
    tags.append(conf_notebook_name)
    
    headers = {
        'Accept':'application/json',
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+param_grafana_token
    }
    
    data ={
      "time":start,
      "timeEnd":end,
      "created": end,
      "tags":tags,
      "text": message
    }
    resp = requests.post(param_grafana_base_url+'/api/annotations',verify=conf_grafana_verify_ssl,headers=headers,json=data)
    

def save_chunk_to_laz_file(in_filename, 
                           out_filename, 
                           offset, 
                           n_points):
    """Read points from a LAS/LAZ file and write them to a new file."""
    
    points = np.array([])
    
    with laspy.open(in_filename) as in_file:
        with laspy.open(out_filename, 
                        mode="w", 
                        header=in_file.header) as out_file:
            in_file.seek(offset)
            points = in_file.read_points(n_points)
            out_file.write_points(points)
    return len(points)

def split_strategy(filename, max_filesize):
    """Set up splitting strategy for a LAS/LAZ file."""
    with laspy.open(filename) as f:
        bytes_per_point = (
            f.header.point_format.num_standard_bytes +
            f.header.point_format.num_extra_bytes
        )
        n_points = f.header.point_count
    n_points_target = int(
        max_filesize * int(param_laz_compression_factor) / bytes_per_point
    )
    stem, ext = os.path.splitext(filename)
    return [
        (filename, f"{stem}-{n}{ext}", offset, n_points_target)
        for n, offset in enumerate(range(0, n_points, n_points_target))
    ]

##################### Don't know how to run this sequentially ################################
from webdav3.client import Client

start = int(round(time.time() * 1000))

client = Client(conf_wd_opts)
client.mkdir(conf_remote_path_split.as_posix())


remote_path_split = conf_remote_path_split

file = laz_files
# for file in laz_files:

client.download_sync(remote_path=os.path.join(conf_remote_path_ahn,file), local_path=file)
inps = split_strategy(file, int(param_max_filesize))
for inp in inps:
    save_chunk_to_laz_file(*inp)
client.upload_sync(remote_path=os.path.join(conf_remote_path_split,file), local_path=file)

for f in os.listdir('.'):
    if not f.endswith('.LAZ'):
        continue
    os.remove(os.path.join('.', f))
    
split_laz_files = laz_files

end = int(round(time.time() * 1000))
send_annotation(start=start,end=end,message='split big files 01-60-22')

TypeError: join() argument must be str, bytes, or os.PathLike object, not 'list'

## Retiling of big files into smaller tiles

In [8]:
# Retiling 01-06-22

def send_annotation(start=None,end=None,message=None,tags=None):
    if not tags:
        tags = []
    
    tags.append(conf_notebook_name)
    
    headers = {
        'Accept':'application/json',
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+param_grafana_token
    }
    
    data ={
      "time":start,
      "timeEnd":end,
      "created": end,
      "tags":tags,
      "text": message
    }
    resp = requests.post(param_grafana_base_url+'/api/annotations',verify=conf_grafana_verify_ssl,headers=headers,json=data)
    
    
start = int(round(time.time() * 1000))
remote_path_retiled = str(conf_remote_path_retiled)

grid_retile = {
    'min_x': float(param_min_x),
    'max_x': float(param_max_x),
    'min_y': float(param_min_y),
    'max_y': float(param_max_y),
    'n_tiles_side': int(param_n_tiles_side)
}


retiling_input = {
    'setup_local_fs': {'tmp_folder': conf_local_tmp.as_posix()},
    'pullremote': conf_remote_path_split.as_posix(),
    'set_grid': grid_retile,
    'split_and_redistribute': {},
    'validate': {},
    'pushremote': conf_remote_path_retiled.as_posix(),
    'cleanlocalfs': {}
}


# try:
#     get_ipython
#     file = laz_files[0]
# except:
#     file = laz_files
    
# for file in laz_files:
file = split_laz_files
retiler = Retiler(file.replace('"',''),label=file).config(retiling_input).setup_webdav_client(conf_wd_opts)
retiler_output = retiler.run()

end = int(round(time.time() * 1000))
send_annotation(start=start,end=end,message='Retiling 01-06-22')

2022-06-02 10:35:16,570 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/C_19HZ2.LAZ_input
2022-06-02 10:35:16,574 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/C_19HZ2.LAZ_output
2022-06-02 10:35:16,578 -           laserfarm.pipeline_remote_data -       INFO - Pulling from WebDAV /webdav/split/C_19HZ2.LAZ ...
2022-06-02 10:35:17,870 -           laserfarm.pipeline_remote_data -       INFO - ... pulling completed.
2022-06-02 10:35:17,872 -                        laserfarm.retiler -       INFO - Setting up the target grid
2022-06-02 10:35:17,875 -                        laserfarm.retiler -       INFO - Splitting file /tmp/C_19HZ2.LAZ_input/C_19HZ2.LAZ with PDAL ...
2022-06-02 10:35:18,029 -                        laserfarm.retiler -       INFO - ... splitting completed.
2022-06-02 10:35:18,031 -                        laserfarm.retiler -       INFO - Redistributing files to tiles ...
2022-06-02 10:35:18,032 -              

## Fetching retilied files (tiles) from remote WebDAV

In [6]:
# Fetch Tiles 01-06-22
remote_path_retiled
tiles = [t.strip('/') for t in list_remote(get_wdclient(conf_wd_opts), conf_remote_path_retiled.as_posix())
         if fnmatch.fnmatch(t, 'tile_*_*/')]

## Normalization - normalize all the point cloud

This step is added as the previous notebook did not include this step. The two cells below are the original code deployed on SURF using macroPipline function, so it needs to be modified in order to be containerized.

In [7]:
# normalization 01-06-22
import copy

def send_annotation(start=None,end=None,message=None,tags=None):
    if not tags:
        tags = []
    
    tags.append(conf_notebook_name)
    
    headers = {
        'Accept':'application/json',
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+param_grafana_token
    }
    
    data ={
      "time":start,
      "timeEnd":end,
      "created": end,
      "tags":tags,
      "text": message
    }
    resp = requests.post(param_grafana_base_url+'/api/annotations',verify=conf_grafana_verify_ssl,headers=headers,json=data)


start = int(round(time.time() * 1000))


tiles

remote_path_norm = str(conf_remote_path_norm)

normalization_input = {
    'setup_local_fs': {'tmp_folder': conf_local_tmp.as_posix()},
    'pullremote': conf_remote_path_retiled.as_posix(),
    'load': {'attributes': 'all'},
    # Filter out artifically high points - give overflow error when writing
    'apply_filter': {'filter_type':'select_below',
                     'attribute': 'z',
                     'threshold': 10000.},  # remove non-physically heigh points
    'normalize': 1,
    'clear_cache' : {},
    'pushremote': conf_remote_path_norm.as_posix(),
}

# write input dictionary to JSON file
with open('normalize.json', 'w') as f:
    json.dump(normalization_input, f)
    

# add pipeline list to macro-pipeline object and set the corresponding labels
tile = tiles
# for tile in tiles:
normalization_input_ = copy.deepcopy(normalization_input)
normalization_input_['export_point_cloud'] = {'filename': '{}.laz'.format(tile),'overwrite': True}
dp = DataProcessing(tile, label=tile).config(normalization_input_).setup_webdav_client(conf_wd_opts)
dp.run()

end = int(round(time.time() * 1000))
send_annotation(start=start,end=end,message='normalization 01-06-22')


2022-05-25 12:02:06,726 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/tile_278_391_input
2022-05-25 12:02:06,727 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/tile_278_391_output
2022-05-25 12:02:06,730 -           laserfarm.pipeline_remote_data -       INFO - Pulling from WebDAV /webdav/retiled/tile_278_391 ...
2022-05-25 12:02:09,637 -           laserfarm.pipeline_remote_data -       INFO - ... pulling completed.
2022-05-25 12:02:09,639 -                laserfarm.data_processing -       INFO - Loading point cloud data ...
2022-05-25 12:02:09,640 -                laserfarm.data_processing -       INFO - ... loading /tmp/tile_278_391_input/tile_278_391/C_01GN2_1.LAZ
2022-05-25 12:02:12,979 -                laserfarm.data_processing -       INFO - ... loading completed.
2022-05-25 12:02:12,981 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2022-05-25 12:02:13,855 -        

## Fetching normalized files (tiles) from remote WebDAV

In [8]:
# Fetch norm Tiles 01-06-22
remote_path_norm
norm_tiles = [t.strip('/') for t in list_remote(get_wdclient(conf_wd_opts), conf_remote_path_norm.as_posix())
         if fnmatch.fnmatch(t, 'tile_*_*.laz')]

## Extract features - extract defined features from normalized tiles

In [9]:
# Feature Extraction 01-06-22

def send_annotation(start=None,end=None,message=None,tags=None):
    if not tags:
        tags = []
    
    tags.append(conf_notebook_name)
    
    headers = {
        'Accept':'application/json',
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+param_grafana_token
    }
    
    data ={
      "time":start,
      "timeEnd":end,
      "created": end,
      "tags":tags,
      "text": message
    }
    resp = requests.post(param_grafana_base_url+'/api/annotations',verify=conf_grafana_verify_ssl,headers=headers,json=data)


start = int(round(time.time() * 1000))


features = [param_feature_name]

tile_mesh_size = float(param_tile_mesh_size)

grid_feature = {
    'min_x': float(param_min_x),
    'max_x': float(param_max_x),
    'min_y': float(param_min_y),
    'max_y': float(param_max_y),
    'n_tiles_side': int(param_n_tiles_side)
}

feature_extraction_input = {
    'setup_local_fs': {'tmp_folder': conf_local_tmp.as_posix()},
    'pullremote': conf_remote_path_norm.as_posix(),
    'load': {'attributes': [param_attribute]},
    'normalize': 1,
    'apply_filter': {
        'filter_type': param_filter_type, 
        'attribute': param_attribute,
        'value': [int(param_apply_filter_value)]#ground surface (2), water (9), buildings (6), artificial objects (26), and unclassified (1)
    },
    'generate_targets': {
        'tile_mesh_size' : tile_mesh_size,
        'validate' : True,
        'validate_precision': float(param_validate_precision),
        **grid_feature
    },
    'extract_features': {
        'feature_names': features,
        'volume_type': 'cell',
        'volume_size': tile_mesh_size
    },
    'export_targets': {
        'attributes': features,
        'multi_band_files': False
    },
    'pushremote': conf_remote_path_targets.as_posix(),
#     'cleanlocalfs': {}
}    

t = norm_tiles
# for t in norm_tiles:
stem, _ = os.path.splitext(t)
idx = [int(el) for el in (stem.split('_')[1:])]
processing = DataProcessing(t, tile_index=idx,label=stem).config(feature_extraction_input).setup_webdav_client(conf_wd_opts)
processing.run()

end = int(round(time.time() * 1000))
send_annotation(start=start,end=end,message='Feature Extraction 01-06-22')

2022-05-25 12:02:42,438 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/tile_287_378_input
2022-05-25 12:02:42,439 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/tile_287_378_output
2022-05-25 12:02:42,440 -           laserfarm.pipeline_remote_data -       INFO - Pulling from WebDAV /webdav/norm/tile_287_378.laz ...
2022-05-25 12:02:43,646 -           laserfarm.pipeline_remote_data -       INFO - ... pulling completed.
2022-05-25 12:02:43,648 -                laserfarm.data_processing -       INFO - Loading point cloud data ...
2022-05-25 12:02:43,648 -                laserfarm.data_processing -       INFO - ... loading /tmp/tile_287_378_input/tile_287_378.laz
2022-05-25 12:02:43,735 -                laserfarm.data_processing -       INFO - ... loading completed.
2022-05-25 12:02:43,736 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2022-05-25 12:02:43,763 -                 

## GeoTIFF export - generate GeoTIFF raster layer a

In [10]:
# GeoTIFF Export 01-06-22

def send_annotation(start=None,end=None,message=None,tags=None):
    if not tags:
        tags = []
    
    tags.append(conf_notebook_name)
    
    headers = {
        'Accept':'application/json',
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+param_grafana_token
    }
    
    data ={
      "time":start,
      "timeEnd":end,
      "created": end,
      "tags":tags,
      "text": message
    }
    resp = requests.post(param_grafana_base_url+'/api/annotations',verify=conf_grafana_verify_ssl,headers=headers,json=data)


start = int(round(time.time() * 1000))


feature = features

remote_path_geotiffs = conf_remote_path_ahn.parent / 'geotiffs'

# setup input dictionary to configure the GeoTIFF export pipeline
geotiff_export_input = {
    'setup_local_fs': {'tmp_folder': conf_local_tmp.as_posix()},
    'pullremote': conf_remote_path_targets.as_posix(),
    'parse_point_cloud': {},
    'data_split': {'xSub': 1, 'ySub': 1},
    'create_subregion_geotiffs': {'output_handle': 'geotiff'},
    'pushremote': remote_path_geotiffs.as_posix(),
    'cleanlocalfs': {}   
}

writer = GeotiffWriter(input_dir=param_feature_name, bands=param_feature_name,label=param_feature_name).config(geotiff_export_input).setup_webdav_client(conf_wd_opts)
writer.run()
end = int(round(time.time() * 1000))
send_annotation(start=start,end=end,message='GeoTIFF Export 01-06-22')

2022-05-25 12:02:53,786 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/perc_95_normalized_height_input
2022-05-25 12:02:53,787 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/perc_95_normalized_height_output
2022-05-25 12:02:53,788 -           laserfarm.pipeline_remote_data -       INFO - Pulling from WebDAV /webdav/targets/perc_95_normalized_height ...
2022-05-25 12:03:17,449 -           laserfarm.pipeline_remote_data -       INFO - ... pulling completed.
2022-05-25 12:03:17,452 -                 laserfarm.geotiff_writer -       INFO - 27 PLY files found
2022-05-25 12:03:17,508 -                 laserfarm.geotiff_writer -       INFO - No. of points per file: 10000
2022-05-25 12:03:17,510 -                 laserfarm.geotiff_writer -       INFO - Resolution: (10.0m x 10.0m)
2022-05-25 12:03:17,510 -                 laserfarm.geotiff_writer -       INFO - Splitting data into (1x1) sub-regions
2022-05-25 12:03:17,511 -     

In [11]:
print(remote_path_geotiffs)

/webdav/geotiffs
