# Filter all the points in the point cloud above sidewalk
In the previous notebook we scraped and parsed sidewalk data in the form of polygons. With the help of this reference data, we can filter all the points in the point cloud cloud that are above the sidewalk. This preprocessing step is performed in this notebook to reduce the amount of points in the point cloud.

If the input point cloud is partly labelled using the [Urban_PointCloud_Processing](https://github.com/Amsterdam-AI-Team/Urban_PointCloud_Processing) project its GROUND and ROAD labels are used to filter ground points; otherwise a separate ground filtering step is performed first.

In [None]:
# Add project src to path.
import set_path

import numpy as np
import geopandas as gpd
import pathlib
from tqdm.notebook import tqdm

import upcp.fusion as fusion
import upcp.utils.ahn_utils as ahn_utils
import upcp.utils.las_utils as las_utils
from upcp.labels import Labels

# Local imports
import upc_sw.sw_utils as sw_utils

In [None]:
# Data folders.
ahn_data_folder = '../datasets/ahn/ahn3_npz/'
bgt_data_file = '../datasets/bgt/bgt_voetpad.gpkg'
pc_data_folder = '../datasets/pointclouds/'
pc_file_prefix = 'processed'

# Use existing labels in the point cloud, if present. Otherwise, perform ground filter step.
use_existing_labels = True
ground_labels = [Labels.GROUND, Labels.ROAD] # Which labels to use as ground.

max_height_above_ground = 2.0

In [None]:
# AHN elevation reader.
ahn_reader = ahn_utils.NPZReader(data_folder=ahn_data_folder, caching=False)

# Sidewalk polygon reader.
sw_gdf = gpd.read_file(bgt_data_file).set_index('ogc_fid')

# Ground fuser using pre-processed AHN data. Used when no existing labels are available.
ground_fuser = fusion.AHNFuser(Labels.GROUND, ahn_reader=ahn_reader,
                               target='ground', epsilon=0.2, refine_ground=False)

In [None]:
# Create folders for obstacle files.
for run in ['run1', 'run2']:
    new_path = f'{pc_data_folder}obstacles_{run}'
    pathlib.Path(new_path).mkdir(parents=True, exist_ok=True)

In [None]:
# Get a list of all tilecodes for which we have two runs.
all_tiles = set(las_utils.get_tilecodes_from_folder(f'{pc_data_folder}run1/', las_prefix=pc_file_prefix)
                .intersection(las_utils.get_tilecodes_from_folder(f'{pc_data_folder}run2/', las_prefix=pc_file_prefix)))

In [None]:
tile_tqdm = tqdm(all_tiles, unit='tile', smoothing=0)

for tilecode in tile_tqdm:
    tile_tqdm.set_postfix_str(tilecode)
    print(f'Processing tile {tilecode}...')
    for run in ['run1', 'run2']:
        file = f'{pc_data_folder}{run}/{pc_file_prefix}_{tilecode}.laz'
        
        # Load pointcloud data.
        points, labels = sw_utils.read_las(file, extra_val='label')
        obstacle_mask = np.zeros((len(points),), dtype=bool)
        
        # Load ground points.
        if use_existing_labels and np.count_nonzero(labels) > 0:
            print('Using labels found in pointcloud file.')
            ground_mask = sw_utils.create_label_mask(labels, target_labels=ground_labels)
        else:
            mask = np.ones((len(points),), dtype=bool)
            ground_mask = ground_fuser.get_label_mask(points, labels, mask, tilecode)
        
        # Extract points aboves sidewalk.
        sw_mask, has_polys = sw_utils.sidewalk_clip(
                                    points[~ground_mask], tilecode, sw_poly_gdf=sw_gdf,
                                    ahn_reader=ahn_reader, max_height=max_height_above_ground)
        
        if has_polys:  # Only save .laz file when sidewalk polys are present
            obstacle_mask[~ground_mask] = sw_mask

            # Save the new point cloud
            out_file = f'{pc_data_folder}obstacles_{run}/obst_{tilecode}.laz'
            sw_utils.write_las(points[obstacle_mask], out_file, values=labels[obstacle_mask])