# Filter all the points in the point cloud above sidewalk
In the previous notebook we scraped and parsed sidewalk data in the form of polygons. With the help of this reference data, we can filter all the points in the point cloud cloud that are above the sidewalk. This preprocessing step is performed in this notebook to reduce the amount of points in the point cloud. The provided input point cloud in this example is already partly labelled using the [Urban_PointCloud_Processing](https://github.com/Amsterdam-AI-Team/Urban_PointCloud_Processing) project with the classes: ground, buildings and cars.

In [None]:
import glob
import os
from pathlib import Path

import upcp.utils.bgt_utils as bgt_utils
import upcp.utils.ahn_utils as ahn_utils
from upcp.labels import Labels

from las_dingen import *
from sidewalk_filter import *

In [None]:
partly_labelled_folder = '../datasets/pointclouds/partly_labelled/'
sidewalk_filter_folder = '../datasets/pointclouds/sidewalk/'
cloud_run1 = 'run1/'
cloud_run2 = 'run2/'

max_obstacle_height = 2.0 # In meters

exclude_label = [Labels.GROUND, Labels.BUILDING, Labels.CAR]

# Create reader for BGT sidewalk part polygons.
bgt_road_file = '../datasets/bgt/bgt_voetpad_demo.csv'
bgt_sidewalk_reader = bgt_utils.BGTPolyReader(bgt_file=bgt_road_file)

# We need elevation data to determine the height of points above ground.
ahn_data_folder = '../datasets/ahn/'
ahn_reader = ahn_utils.NPZReader(ahn_data_folder)

In [None]:
# Make new folder structure if not exists
for run in [cloud_run2, cloud_run1]:
    new_path = os.path.join(sidewalk_filter_folder, run)
    Path(new_path).mkdir(parents=True, exist_ok=True)

# Iterate over laz files in folder
for path in glob.glob(f'{os.path.join(partly_labelled_folder, cloud_run1)}*.laz'):
    filename = os.path.basename(path)
    tilecode = os.path.splitext(filename)[0].split('processed_')[1]

    sidewalk_polygons = bgt_sidewalk_reader.filter_tile(
                            tilecode, bgt_types=['voetpad'],
                            padding=0, offset=0,
                            merge=False)
    
    if len(sidewalk_polygons) == 0:
        print('No sidewalk polygons found for tile. Next...')
        continue
        
    # Start with file from the other folder
    for run in [cloud_run2, cloud_run1]:
        try:
            # Read point cloud file
            points, labels = read_las(os.path.join(partly_labelled_folder, run, filename))
        except FileNotFoundError:
            print(f'Tile {filename} is not present in both runs. Next...')
            break

        # Check if point cloud has "ground" labelled points
        if Labels.GROUND not in labels:
            print(f"No ground/sidewalk points available for file {filename}. Next...")
            break

        mask = create_mask(labels, exclude_label)

        # Filter all the points above sidewalk        
        label_mask = get_label_mask(points, mask, tilecode, sidewalk_polygons, ahn_reader, max_obstacle_height)

        # Save the new point cloud
        write_las(points[label_mask], os.path.join(sidewalk_filter_folder, run, filename), labels[label_mask])