# Fix HAG values for trees

The height above ground values are unknown when a tree is partially above water. To "standardize" the HAG values per tree, we correct them based on the average HAG for each tree cluster. If a cluster has no valid HAG values, the NAP (z-coordinate) value is used instead.

In [None]:
import set_path

import numpy as np
import laspy
import pathlib

from tqdm.notebook import tqdm

from upcp.region_growing.label_connected_comp import LabelConnectedComp

import gvl.helper_functions as helpers

## Settings

In [None]:
BASE_FOLDER = pathlib.Path('../datasets')

input_dir = BASE_FOLDER / 'AHN4' / 'AMS_subtiles_1000_pred'
overwrite_input = True

tree_filter = {'grid_size': 0.6,
               'min_component_size': 50}

# Our classification
UNKNOWN = 0
TREE = 1
NOISE = 2
OTHER = 0

In [None]:
# Create output folder
if not overwrite_input:
    if 'output_dir' in locals():
        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
    else:
        print("Please define 'output_dir' and run again.")

## Main loop

In [None]:
input_files = list(pathlib.Path(input_dir).glob('merged*.laz'))

In [None]:
pbar = tqdm(input_files, unit='file', smoothing=0)

for file in pbar:
    tilecode = helpers.get_tilecode_from_filename(file.name)
    pbar.set_postfix_str(tilecode)
    
    # Load LAS data
    las = laspy.read(file)
    points_xyz = np.vstack((las.x, las.y, las.z)).T
    hag = las.hag
    new_hag = np.copy(hag)
    
    mask_ids = np.where(las.label == TREE)[0]
    
    # Extract "tree" clusters
    lcc = LabelConnectedComp(grid_size=tree_filter['grid_size'],
                             min_component_size=tree_filter['min_component_size'])
    point_components = lcc.get_components(points_xyz[mask_ids])

    cc_labels = np.unique(point_components)
    cc_labels = set(cc_labels).difference((-1,))
    
    # Iterate over the clusters
    for cc in tqdm(cc_labels, smoothing=0, leave=False):
        # select points that belong to the cluster
        cc_mask = (point_components == cc)
        cc_z = points_xyz[mask_ids[cc_mask], 2]
        cc_hag = hag[mask_ids[cc_mask]]
        
        if np.isnan(cc_hag).all():
            cc_offset = 0.
        else:
            cc_offset = np.nanmean(cc_hag) - np.mean(cc_z)
        new_hag[mask_ids[cc_mask]] = cc_z + cc_offset
    
    las.hag = new_hag
    if overwrite_input:
        las.write(file)
    else:
        if 'output_dir' in locals():
            las.write(pathlib.Path(output_dir) / file.name)
        else:
            print("Please define 'output_dir'.")
            break