# Laserfarm: LiDAR point cloud analysis for macro-ecology

## Configuration

### User parameters

Defines the parameters that can be set by users when executing the workflow.

In [1]:
# (DO NOT containerize this cell)

# 
param_laz_urls = ["https://basisdata.nl/hwh-ahn/AHN6/01_LAZ/AHN6_2025_C_168000_520000.LAZ", "https://basisdata.nl/hwh-ahn/AHN6/01_LAZ/AHN6_2025_C_168000_519000.LAZ"]

# Data handling parameters
param_minio_endpoint = 'scruffy.lab.uvalight.net:9000'
param_minio_public_bucket = 'naa-vre-public'
param_minio_virtual_lab_bucket = 'naa-vre-laserfarm'

# Laserfarm parameters
param_feature_name = 'perc_95_normalized_height'
param_validate_precision = '0.001'
param_tile_mesh_size = '10.'
param_filter_type = 'select_equal'
param_attribute = 'raw_classification'
param_min_x = '-113107.81'  # EPSG:28992
param_max_x = '398892.19'  # EPSG:28992
param_min_y = '214783.87'  # EPSG:28992
param_max_y = '726783.87'  # EPSG:28992
param_n_tiles_side = '512'
param_apply_filter_value = '1'

In [2]:
# Secrets (DO NOT containerize this cell)
from SecretsProvider import SecretsProvider
from getpass import getpass

secrets_provider = SecretsProvider(input_func=getpass)
secret_minio_access_key = secrets_provider.get_secret('secret_minio_access_key')
secret_minio_secret_key = secrets_provider.get_secret('secret_minio_secret_key')

### Dependencies

The following cells install extra dependencies that are not included in the Laserfarm flavor by default, and import the libraries used in the notebook.

In [3]:
# (DO NOT containerize this cell)

import json
import os

from laserfarm import DataProcessing, GeotiffWriter, Retiler
from laserfarm.remote_utils import get_wdclient, list_remote
from minio import Minio
import laspy

### Global configuration

The following variable are used throughout the code. They are intended to be edited by developers who the notebook.

In [4]:
# (DO NOT containerize this cell)

conf_local_tmp = '/tmp/data'
conf_local_path_raw = os.path.join(conf_local_tmp, 'raw')
conf_local_path_split = os.path.join(conf_local_tmp, 'split')
conf_local_path_retiled = os.path.join(conf_local_tmp, 'retiled')
conf_local_path_targets = os.path.join(conf_local_tmp, 'targets')
conf_local_path_geotiff = os.path.join(conf_local_tmp, 'geotiff')
conf_local_path_figures = os.path.join(conf_local_tmp, 'figures')

## Workflow steps

In [5]:
# Check if file is processed


### Fetch laz files from remote storage

This cell downloads `.laz` files from the remote MinIO storage.

In [6]:
# S1 Fetch laz files
import urllib.request

os.makedirs(conf_local_path_raw, exist_ok=True)

raw_laz_files = []
for laz_url in param_laz_urls:
    print(f"retrieving file from {laz_url}")
    filename = laz_url.rpartition('/')[-1] 
    file_location = f"{conf_local_path_raw}/{filename}"
    # urllib.request.urlretrieve(laz_url, f"{filename}")
    urllib.request.urlretrieve(laz_url, file_location)

    raw_laz_files.append(file_location)

print(raw_laz_files)

retrieving file from https://basisdata.nl/hwh-ahn/AHN6/01_LAZ/AHN6_2025_C_168000_520000.LAZ
retrieving file from https://basisdata.nl/hwh-ahn/AHN6/01_LAZ/AHN6_2025_C_168000_519000.LAZ
['/tmp/data/raw/AHN6_2025_C_168000_520000.LAZ', '/tmp/data/raw/AHN6_2025_C_168000_519000.LAZ']


In [7]:
# S3 Retile laz files
# base image: laserfarm

grid_retile = {
    'min_x': float(param_min_x),
    'max_x': float(param_max_x),
    'min_y': float(param_min_y),
    'max_y': float(param_max_y),
    'n_tiles_side': int(param_n_tiles_side),
    }

retiling_input = {
    'setup_local_fs': {
        'input_folder': conf_local_path_split,
        'output_folder': conf_local_path_retiled,
        },
    'set_grid': grid_retile,
    'split_and_redistribute': {},
    'validate': {},
    }

os.makedirs(conf_local_path_retiled, exist_ok=True)
tiles = []

for file in raw_laz_files:
    base_name = os.path.splitext(os.path.basename(file))[0]
    retile_record_filename = os.path.join(
        conf_local_path_retiled,
        f'{base_name}_retile_record.js',
        )
    if not os.path.isfile(retile_record_filename):
        print(f'Retiling {file}')
        retiler = Retiler(file, label=file).config(retiling_input)
        retiler.run()
    else:
        print(
            f'Skipping retiling of {file} because {retile_record_filename} already exists'
            )
    # load filenames from retile record
    with open(retile_record_filename, 'r') as f:
        retile_record = json.load(f)
    
    tiles += retile_record['redistributed_to']
    

print(retile_record)

2025-12-29 11:35:00,255 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/data/split
2025-12-29 11:35:00,256 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/data/retiled
2025-12-29 11:35:00,258 -                        laserfarm.retiler -       INFO - Setting up the target grid
2025-12-29 11:35:00,259 -                        laserfarm.retiler -       INFO - Splitting file /tmp/data/raw/AHN6_2025_C_168000_520000.LAZ with PDAL ...
2025-12-29 11:35:03,434 -                        laserfarm.retiler -       INFO - ... splitting completed.


Retiling /tmp/data/raw/AHN6_2025_C_168000_520000.LAZ


2025-12-29 11:35:03,437 -                        laserfarm.retiler -       INFO - Redistributing files to tiles ...
2025-12-29 11:35:03,441 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_520000_1.LAZ to tile_281_306
2025-12-29 11:35:03,442 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_520000_3.LAZ to tile_282_305
2025-12-29 11:35:03,444 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_520000_2.LAZ to tile_282_306
2025-12-29 11:35:03,445 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_520000_4.LAZ to tile_281_305
2025-12-29 11:35:03,446 -                        laserfarm.retiler -       INFO - ... redistributing completed.
2025-12-29 11:35:03,447 -                        laserfarm.retiler -       INFO - Validating split ...
2025-12-29 11:35:03,448 -                        laserfarm.retiler -       INFO - ... 2014281 points in parent fi

Retiling /tmp/data/raw/AHN6_2025_C_168000_519000.LAZ


2025-12-29 11:35:07,052 -                        laserfarm.retiler -       INFO - Redistributing files to tiles ...
2025-12-29 11:35:07,053 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_519000_2.LAZ to tile_281_304
2025-12-29 11:35:07,055 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_519000_3.LAZ to tile_282_305
2025-12-29 11:35:07,056 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_519000_4.LAZ to tile_282_304
2025-12-29 11:35:07,057 -                        laserfarm.retiler -       INFO - ... file AHN6_2025_C_168000_519000_1.LAZ to tile_281_305
2025-12-29 11:35:07,058 -                        laserfarm.retiler -       INFO - ... redistributing completed.
2025-12-29 11:35:07,059 -                        laserfarm.retiler -       INFO - Validating split ...
2025-12-29 11:35:07,059 -                        laserfarm.retiler -       INFO - ... 2428338 points in parent fi

{'file': '/tmp/data/raw/AHN6_2025_C_168000_519000.LAZ', 'redistributed_to': ['tile_281_305', 'tile_282_304', 'tile_282_305', 'tile_281_304'], 'validated': True}


### Extract features from tiles

Run the feature extraction for each tile. The features are extracted using [laserchicken](https://github.com/eEcoLiDAR/laserchicken).

In [8]:
# S5 Extract features
# base image: laserfarm

feature_files = []

for i, tile in enumerate(tiles):
    grid_feature = {
        'min_x': float(param_min_x),
        'max_x': float(param_max_x),
        'min_y': float(param_min_y),
        'max_y': float(param_max_y),
        'n_tiles_side': int(param_n_tiles_side),
        }

    feature_extraction_input = {
        'setup_local_fs': {
            'input_folder': conf_local_path_retiled,
            'output_folder': conf_local_path_targets,
            },
        'load': {'attributes': [param_attribute]},
        'normalize': 1,
        'apply_filter': {
            'filter_type': param_filter_type,
            'attribute': param_attribute,
            'value': [int(param_apply_filter_value)],
            #ground surface (2), water (9), buildings (6), artificial objects (26), vegetation (?), and unclassified (1)
            },
        'generate_targets': {
            'tile_mesh_size': float(param_tile_mesh_size),
            'validate': True,
            'validate_precision': float(param_validate_precision),
            **grid_feature
            },
        'extract_features': {
            'feature_names': [param_feature_name],
            'volume_type': 'cell',
            'volume_size': float(param_tile_mesh_size),
            },
        'export_targets': {
            'attributes': [param_feature_name],
            'multi_band_files': False,
            },
        }
    idx = (tile.split('_')[1:])

    target_file = os.path.join(
        conf_local_path_targets, param_feature_name, tile + '.ply'
        )
    print(target_file)

    if not os.path.isfile(target_file):
        processing = DataProcessing(tile, tile_index=idx, label=tile).config(
            feature_extraction_input
            )
        processing.run()
    else:
        print(
            f'Skipping features extraction for {tile} ({i + 1} of {len(tiles)}) because {target_file} already exists'
            )

    feature_files.append(target_file)

print(feature_files)

2025-12-29 11:35:07,076 -           laserfarm.pipeline_remote_data -       INFO - Input dir set to /tmp/data/retiled
2025-12-29 11:35:07,077 -           laserfarm.pipeline_remote_data -       INFO - Output dir set to /tmp/data/targets
2025-12-29 11:35:07,078 -                laserfarm.data_processing -       INFO - Loading point cloud data ...
2025-12-29 11:35:07,079 -                laserfarm.data_processing -       INFO - ... loading /tmp/data/retiled/tile_282_306/AHN6_2025_C_168000_520000_2.LAZ


/tmp/data/targets/perc_95_normalized_height/tile_282_306.ply


2025-12-29 11:35:07,375 -                laserfarm.data_processing -       INFO - ... loading completed.
2025-12-29 11:35:07,376 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2025-12-29 11:35:07,428 -                                     root -       INFO - Cylinder size in Bytes: 150197785.47624204
2025-12-29 11:35:07,429 -                                     root -       INFO - Memory size in Bytes: 33650999296
2025-12-29 11:35:07,430 -                                     root -       INFO - Start tree creation
2025-12-29 11:35:07,483 -                                     root -       INFO - Done with env tree creation
2025-12-29 11:35:07,486 -                                     root -       INFO - Done with target tree creation
2025-12-29 11:35:08,413 -                laserfarm.data_processing -       INFO - ... normalization completed.
2025-12-29 11:35:08,414 -                laserfarm.data_processing -       INFO - Filtering point-cl

/tmp/data/targets/perc_95_normalized_height/tile_281_305.ply


2025-12-29 11:35:08,865 -                laserfarm.data_processing -       INFO - ... loading completed.
2025-12-29 11:35:08,866 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2025-12-29 11:35:09,818 -                                     root -       INFO - Cylinder size in Bytes: 5723102168.897599
2025-12-29 11:35:09,819 -                                     root -       INFO - Memory size in Bytes: 33650999296
2025-12-29 11:35:09,820 -                                     root -       INFO - Start tree creation
2025-12-29 11:35:09,875 -                                     root -       INFO - Done with env tree creation
2025-12-29 11:35:09,953 -                                     root -       INFO - Done with target tree creation
2025-12-29 11:35:18,218 -                laserfarm.data_processing -       INFO - ... normalization completed.
2025-12-29 11:35:18,219 -                laserfarm.data_processing -       INFO - Filtering point-clo

/tmp/data/targets/perc_95_normalized_height/tile_282_305.ply


2025-12-29 11:35:18,620 -                laserfarm.data_processing -       INFO - ... loading /tmp/data/retiled/tile_282_305/AHN6_2025_C_168000_520000_3.LAZ
2025-12-29 11:35:19,238 -                laserfarm.data_processing -       INFO - ... loading completed.
2025-12-29 11:35:19,240 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2025-12-29 11:35:19,366 -                                     root -       INFO - Cylinder size in Bytes: 692155693.4389035
2025-12-29 11:35:19,367 -                                     root -       INFO - Memory size in Bytes: 33650999296
2025-12-29 11:35:19,368 -                                     root -       INFO - Start tree creation
2025-12-29 11:35:19,529 -                                     root -       INFO - Done with env tree creation
2025-12-29 11:35:19,538 -                                     root -       INFO - Done with target tree creation
2025-12-29 11:35:23,012 -                laserfarm.data

/tmp/data/targets/perc_95_normalized_height/tile_281_306.ply


2025-12-29 11:35:23,679 -                                     root -       INFO - Cylinder size in Bytes: 1241913170.650779
2025-12-29 11:35:23,680 -                                     root -       INFO - Memory size in Bytes: 33650999296
2025-12-29 11:35:23,681 -                                     root -       INFO - Start tree creation
2025-12-29 11:35:23,703 -                                     root -       INFO - Done with env tree creation
2025-12-29 11:35:23,720 -                                     root -       INFO - Done with target tree creation
2025-12-29 11:35:25,802 -                laserfarm.data_processing -       INFO - ... normalization completed.
2025-12-29 11:35:25,803 -                laserfarm.data_processing -       INFO - Filtering point-cloud data
2025-12-29 11:35:25,804 -                laserfarm.data_processing -       INFO - Setting up the target grid
2025-12-29 11:35:25,805 -                laserfarm.data_processing -       INFO - Checking whether points 

/tmp/data/targets/perc_95_normalized_height/tile_281_305.ply
Skipping features extraction for tile_281_305 (5 of 8) because /tmp/data/targets/perc_95_normalized_height/tile_281_305.ply already exists
/tmp/data/targets/perc_95_normalized_height/tile_282_304.ply


2025-12-29 11:35:26,591 -                laserfarm.data_processing -       INFO - ... loading completed.
2025-12-29 11:35:26,592 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2025-12-29 11:35:26,677 -                                     root -       INFO - Cylinder size in Bytes: 542650063.6561003
2025-12-29 11:35:26,678 -                                     root -       INFO - Memory size in Bytes: 33650999296
2025-12-29 11:35:26,679 -                                     root -       INFO - Start tree creation
2025-12-29 11:35:26,812 -                                     root -       INFO - Done with env tree creation
2025-12-29 11:35:26,819 -                                     root -       INFO - Done with target tree creation
2025-12-29 11:35:29,593 -                laserfarm.data_processing -       INFO - ... normalization completed.
2025-12-29 11:35:29,594 -                laserfarm.data_processing -       INFO - Filtering point-clo

/tmp/data/targets/perc_95_normalized_height/tile_282_305.ply
Skipping features extraction for tile_282_305 (7 of 8) because /tmp/data/targets/perc_95_normalized_height/tile_282_305.ply already exists
/tmp/data/targets/perc_95_normalized_height/tile_281_304.ply


2025-12-29 11:35:30,505 -                laserfarm.data_processing -       INFO - ... loading completed.
2025-12-29 11:35:30,506 -                laserfarm.data_processing -       INFO - Normalizing point-cloud heights ...
2025-12-29 11:35:31,246 -                                     root -       INFO - Cylinder size in Bytes: 4486912100.415718
2025-12-29 11:35:31,247 -                                     root -       INFO - Memory size in Bytes: 33650999296
2025-12-29 11:35:31,249 -                                     root -       INFO - Start tree creation
2025-12-29 11:35:31,355 -                                     root -       INFO - Done with env tree creation
2025-12-29 11:35:31,418 -                                     root -       INFO - Done with target tree creation
2025-12-29 11:35:39,109 -                laserfarm.data_processing -       INFO - ... normalization completed.
2025-12-29 11:35:39,110 -                laserfarm.data_processing -       INFO - Filtering point-clo

['/tmp/data/targets/perc_95_normalized_height/tile_282_306.ply', '/tmp/data/targets/perc_95_normalized_height/tile_281_305.ply', '/tmp/data/targets/perc_95_normalized_height/tile_282_305.ply', '/tmp/data/targets/perc_95_normalized_height/tile_281_306.ply', '/tmp/data/targets/perc_95_normalized_height/tile_281_305.ply', '/tmp/data/targets/perc_95_normalized_height/tile_282_304.ply', '/tmp/data/targets/perc_95_normalized_height/tile_282_305.ply', '/tmp/data/targets/perc_95_normalized_height/tile_281_304.ply']


In [9]:
### Save to MinIO

def copy_to_minio(filepath : str):
    filename = filepath.replace(conf_local_path_targets,'')
    minio_client.fput_object(bucket_name=param_minio_virtual_lab_bucket, file_path=filepath, object_name=filename)
    return

minio_client = Minio(
    param_minio_endpoint, 
    access_key=secret_minio_access_key,
    secret_key=secret_minio_secret_key,
    secure=True
)

for filepath in feature_files:
    copy_to_minio(filepath)
    