# Filter all the points in the point cloud above sidewalk
In the previous notebook we scraped and parsed sidewalk data in the form of polygons. With the help of this reference data, we can filter all the points in the point cloud cloud that are above the sidewalk. This preprocessing step is performed in this notebook to reduce the amount of points in the point cloud.

If the input point cloud is partly labelled using the [Urban_PointCloud_Processing](https://github.com/Amsterdam-AI-Team/Urban_PointCloud_Processing) project its GROUND and ROAD labels are used to filter ground points; otherwise a separate ground filtering step is performed first.

In [None]:
# Add project src to path.
import set_path

import numpy as np
import geopandas as gpd
import pathlib
from tqdm.notebook import tqdm
tqdm.pandas()

import upcp.fusion as fusion
import upcp.utils.ahn_utils as ahn_utils
import upcp.utils.bgt_utils as bgt_utils
import upcp.utils.log_utils as log_utils
import upcp.utils.las_utils as las_utils
from upcp.labels import Labels

# Local imports
import upc_sw.sw_utils as sw_utils
from upc_sw.cluster2polygon import Cluster2Polygon

In [None]:
# Data folders.
ahn_data_folder = '../datasets/ahn/'
bgt_data_file = '../datasets/bgt/bgt_voetpad.csv'
pc_data_folder = '../datasets/pointclouds/'
pc_file_prefix = 'processed'
out_folder = '../datasets/obstacles/'
CRS = 'epsg:28992'

# Use existing labels in the point cloud, if present. Otherwise, perform ground filter step.
use_existing_labels = True
ground_labels = [Labels.GROUND, Labels.ROAD] # Which labels to use as ground.

# Distance threshold for static obstacles.
dist_threshold = 0.2

In [None]:
# AHN elevation reader.
ahn_reader = ahn_utils.NPZReader(data_folder=ahn_data_folder, caching=False)

# Sidewalk polygon reader.
sw_poly_reader = bgt_utils.BGTPolyReader(bgt_file=bgt_data_file)

# Ground fuser using pre-processed AHN data. Used when no existing labels are available.
ground_fuser = fusion.AHNFuser(Labels.GROUND, ahn_reader=ahn_reader,
                               target='ground', epsilon=0.2, refine_ground=False)

# Convert 3D Obstacle blobs to 2D polygons using a clustering algorithm.
# Set use_concave=False to use the faster convex hull.
# Change alpha to determine the 'concaveness' of the concave hull, with 0 being convex.
c2p = Cluster2Polygon(min_component_size=100, grid_size=0.05, use_concave=True, concave_min_area=0., alpha=0.5)

In [None]:
# Create folders for obstacle files.
for run in ['run1', 'run2']:
    new_path = f'{pc_data_folder}obstacles_{run}'
    pathlib.Path(new_path).mkdir(parents=True, exist_ok=True)

In [None]:
# Get a list of all tilecodes for which we have two runs.
all_tiles = (las_utils.get_tilecodes_from_folder(f'{pc_data_folder}run1/', las_prefix=pc_file_prefix)
             .union(las_utils.get_tilecodes_from_folder(f'{pc_data_folder}run2/', las_prefix=pc_file_prefix)))

In [None]:
all_tiles

In [None]:
tile_tqdm = tqdm(all_tiles, unit='tile', smoothing=0)

for tilecode in tile_tqdm:
    tile_tqdm.set_postfix_str(tilecode)
    print(f'Processing tile {tilecode}...')
    for run in ['run1', 'run2']:
        file = f'{pc_data_folder}{run}/{pc_file_prefix}_{tilecode}.laz'
        
        # Load pointcloud data.
        points, labels = sw_utils.read_las(file, extra_val='label')
        obstacle_mask = np.zeros((len(points),), dtype=bool)
        
        # Load ground points.
        if use_existing_labels and np.count_nonzero(labels) > 0:
            print('Using labels found in pointcloud file.')
            ground_mask = sw_utils.create_label_mask(labels, target_labels=ground_labels)
        else:
            mask = np.ones((len(points),), dtype=bool)
            ground_mask = ground_fuser.get_label_mask(points, labels, mask, tilecode)
        
        # Extract points aboves sidewalk.
        # mask_ids = np.where(~ground_mask)[0]
        sw_mask = sw_utils.sidewalk_clip(
                                points[~ground_mask], tilecode, sw_poly_reader=sw_poly_reader,
                                ahn_reader=ahn_reader, max_height=2.0)
        obstacle_mask[~ground_mask] = sw_mask
        
        # Save the new point cloud
        out_file = f'{pc_data_folder}obstacles_{run}/obst_{tilecode}.laz'
        sw_utils.write_las(points[obstacle_mask], out_file, values=labels[obstacle_mask])

In [None]:
tile_tqdm = tqdm(all_tiles, unit='tile', smoothing=0)

obstacle_df = gpd.GeoDataFrame(columns=['tilecode', 'type', 'geometry'], geometry='geometry', crs=CRS)

for tilecode in tile_tqdm:
    tile_tqdm.set_postfix_str(tilecode)
    
    # Read point cloud with M3C2 distances
    in_file = f'{pc_data_folder}m3c2/m3c2_{tilecode}.laz'
    points, m3c2_distance = sw_utils.read_las(in_file, extra_val='M3C2_distance', extra_val_dtype='float32')

    # Filter for static points
    mask = np.abs(m3c2_distance) < m3c2_threshold
    
    # Get the polygons
    polys, types = c2p.get_obstacle_polygons(points[mask])
    data = {'tilecode': [tilecode]*len(polys),
            'type': types,
            'geometry': [Polygon(p) for p in polys]}
    obstacle_df = obstacle_df.append(gpd.GeoDataFrame(data, geometry='geometry', crs=CRS))

In [None]:
# Save the obstacle GeoDataFrame.
pathlib.Path(out_folder).mkdir(parents=True, exist_ok=True)
obstacle_df.to_file(f'{out_folder}obstacles.shp')

In [None]:
points, labels = sw_utils.read_las('../datasets/pointclouds/obstacles_run1/obst_2386_9702.laz', extra_val='label')
c2p = Cluster2Polygon(min_component_size=100, grid_size=0.05, use_concave=True, concave_min_area=0., alpha=1)
polys, types = c2p.get_obstacle_polygons(points)
obstacle_df1 = gpd.GeoDataFrame({'geometry': polys}, crs=CRS)

In [None]:
points, labels = sw_utils.read_las('../datasets/pointclouds/obstacles_run2/obst_2386_9702.laz', extra_val='label')
c2p = Cluster2Polygon(min_component_size=100, grid_size=0.05, use_concave=True, concave_min_area=0., alpha=1)
polys, types = c2p.get_obstacle_polygons(points)
obstacle_df2 = gpd.GeoDataFrame({'geometry': polys}, crs=CRS)

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1)
obstacle_df1.plot(ax=ax, color='green', alpha=0.5)
obstacle_df2.plot(ax=ax, color='grey', alpha=0.5)

In [None]:
def hsd_dist(g):
    dists = [g.geometry.hausdorff_distance(g2) for g2 in obstacle_df2.geometry]
    return min(dists)

obstacle_df1['min_hsd'] = obstacle_df1.progress_apply(hsd_dist, axis=1)

In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

fig, ax = plt.subplots(1)
divider = make_axes_locatable(ax)

cax = divider.append_axes('right', size='4%', pad=0.2)
cax.set_xlabel('hsd')

obstacle_df1.plot(column=obstacle_df1['min_hsd'], legend=True, ax=ax, cax=cax)