# Filter all the points in the point cloud above sidewalk
In the previous notebook we scraped and parsed sidewalk data in the form of polygons. With the help of this reference data, we can filter all the points in the point cloud cloud that are above the sidewalk. This preprocessing step is performed in this notebook to reduce the amount of points in the point cloud.

If the input point cloud is partly labelled using the [Urban_PointCloud_Processing](https://github.com/Amsterdam-AI-Team/Urban_PointCloud_Processing) project its GROUND and ROAD labels are used to filter ground points; otherwise a separate ground filtering step is performed first.

In [None]:
# Add project src to path.
import set_path

import numpy as np
import pandas as pd
import geopandas as gpd
import pathlib
from tqdm.notebook import tqdm

import upcp.fusion as fusion
import upcp.utils.ahn_utils as ahn_utils
import upcp.utils.las_utils as las_utils
from upcp.labels import Labels

# Local imports
import upc_sw.sw_utils as sw_utils

In [None]:
# AHN version, either ahn3 or ahn4
ahn_version = 'ahn3'

# Resume previous incomplete run
resume = True

# Data folders.
base_folder = '../../datasets/Stadsdelen/'
ahn_data_folder = '../../datasets/AHN4/ahn4_npz/'
bgt_data_file = 'bgt/bgt_voetpad.gpkg'
pc_data_folder = 'pointclouds/'

tiles_data_file = '../../datasets/Stadsdelen/all_tiles.csv'
min_points = 10000

use_existing_labels = False

# Which stadsdelen to include
stadsdelen = ['centrum', 'haven', 'nieuw_west', 'noord', 'oost', 'west', 'zuid', 'zuid_oost']

# Max obstacle height to consider
max_height_above_ground = 2.0

In [None]:
# AHN elevation reader.
ahn_reader = ahn_utils.NPZReader(data_folder=ahn_data_folder, caching=False)

# Ground fuser using pre-processed AHN data. Used when no existing labels are available.
ground_fuser = fusion.AHNFuser(Labels.GROUND, ahn_reader=ahn_reader,
                               target='ground', epsilon=0.2, refine_ground=False)

In [None]:
# Create folders for obstacle files.
for stdsdl in stadsdelen:
    for run in ['run1', 'run2']:
        new_path = pathlib.Path(base_folder) / stdsdl / pc_data_folder / f'obstacles_{run}'
        new_path.mkdir(parents=True, exist_ok=True)

In [None]:
# Get a list of all tilecodes to process.
all_tiles_df = pd.read_csv(tiles_data_file)

# Filter by number of points.
all_tiles_df = all_tiles_df[all_tiles_df['n_points'] >= min_points]

# Select those for which we have two runs.
all_tiles_df = all_tiles_df.groupby(['stadsdeel', 'tilecode']).filter(lambda x: len(x) == 2)

In [None]:
def check_done(group):
    (stdsdl, run) = group.name
    done_folder = pathlib.Path(base_folder) / stdsdl / pc_data_folder / f'obstacles_run{run}'
    done_tiles = las_utils.get_tilecodes_from_folder(done_folder, las_prefix='obst')
    group['done'] = group['tilecode'].isin(done_tiles)
    return group

if resume:
    all_tiles_df['done'] = False
    all_tiles_df = all_tiles_df.groupby(['stadsdeel', 'run']).apply(check_done)
    n_done = all_tiles_df['done'].sum()
    print(f'{n_done} / {len(all_tiles_df)} tiles done')
    all_tiles_df = all_tiles_df[~all_tiles_df['done']]

In [None]:
# Load sidewalk data.
bgt_dict = dict()
for stdsdl in all_tiles_df['stadsdeel'].unique():
    file = pathlib.Path(base_folder) / stdsdl / bgt_data_file
    bgt_dict[stdsdl] = gpd.read_file(file).set_index('ogc_fid')

In [None]:
tile_tqdm = tqdm(all_tiles_df.groupby(['stadsdeel', 'tilecode']), unit='tile', smoothing=0)
for tile in tile_tqdm:
    stdsdl = tile[0][0]
    tilecode = tile[0][1]
    tile_tqdm.set_postfix_str(f'{stdsdl}/{tilecode}')
    for run in ['run1', 'run2']:
        file = pathlib.Path(base_folder) / stdsdl / pc_data_folder / run / f'filtered_{tilecode}.laz'

        # Load pointcloud data.
        points, labels = sw_utils.read_las(file, extra_val='label')
        obstacle_mask = np.zeros((len(points),), dtype=bool)

        # Load ground points.
        if use_existing_labels and np.count_nonzero(labels) > 0:
            print(f'{tilecode}: using labels found in pointcloud file.')
            ground_mask = sw_utils.create_label_mask(labels, target_labels=ground_labels)
        else:
            mask = np.ones((len(points),), dtype=bool)
            ground_mask = ground_fuser.get_label_mask(points, labels, mask, tilecode)

        # Extract points aboves sidewalk.
        sw_mask, has_polys = sw_utils.sidewalk_clip(
                                    points[~ground_mask], tilecode, sw_poly_gdf=bgt_dict[stdsdl],
                                    ahn_reader=ahn_reader, max_height=max_height_above_ground)

        if has_polys:  # Only save .laz file when sidewalk polys are present
            obstacle_mask[~ground_mask] = sw_mask

            # Save the new point cloud
            out_file = pathlib.Path(base_folder) / stdsdl / pc_data_folder / f'obstacles_{run}' / f'obst_{tilecode}.laz'
            sw_utils.write_las(points[obstacle_mask], out_file, values=labels[obstacle_mask])