# Extract 2D obstacles
Generate polygons for all static obstacles, and merge these with the sidewalk polygons.

In [None]:
# Add project src to path.
import set_path

import numpy as np
import pandas as pd
import geopandas as gpd
import os
import pathlib
import pickle
import shapely.geometry as sg
import shapely.ops as so
from tqdm.notebook import tqdm
tqdm.pandas()

from upcp.utils import bgt_utils
from upcp.utils import csv_utils
from upcp.utils import las_utils

from upc_sw.cluster2polygon import Cluster2Polygon
from upc_sw import sw_utils
from upc_sw import poly_utils

In [None]:
import warnings  # temporary, to supress deprecationwarnings from shapely
warnings.filterwarnings('ignore')

## Create polygons of static obstacles
In the previous notebook, we performed a change detection algorithm that calculated M3C2 distance for each point in the point cloud. Based on negative and positive threshold values we can filter for the static points in the point cloud. We then cluster these into individual obstacles and create bounding polygons for each.

In [None]:
### SETTINGS ###

CRS = 'epsg:28992'

base_folder = '../../datasets/Stadsdelen/'
pc_data_folder = 'pointclouds/'
bgt_folder = 'bgt/'
out_folder = 'output/'

# Output file
output_file = f'{out_folder}obstacles.gpkg'

# Which stadsdelen to include
stadsdelen = ['centrum', 'haven', 'nieuw_west', 'noord', 'oost', 'west', 'zuid', 'zuid_oost']

# Allow resume by saving intermediate output
resume = True
resume_batch_size = 100
tmp_file = f'{out_folder}obst_tmp.pkl'

# Distance threshold for static obstacles.
m3c2_threshold = 0.2

# Convert 3D Obstacle blobs to 2D polygons using a clustering algorithm.
# Set use_concave=False to use the faster convex hull.
# Change alpha to determine the 'concaveness' of the concave hull, with 0 being convex.
c2p = Cluster2Polygon(min_component_size=100, grid_size=0.2, use_concave=True, concave_min_area=2.5, alpha=0.5)

# Create output folder if it doesn't exist
for stdsdl in stadsdelen:
    new_path = pathlib.Path(base_folder) / stdsdl / out_folder
    new_path.mkdir(parents=True, exist_ok=True)

In [None]:
all_tiles = {'stadsdeel': [],
             'tilecode': []}

for stdsdl in stadsdelen:
    # If gpkg file exists, assume folder is done.
    out_path = pathlib.Path(base_folder) / stdsdl / output_file
    if resume and os.path.exists(out_path):
        continue
    
    path = pathlib.Path(base_folder) / stdsdl / pc_data_folder / 'm3c2'
    tiles = las_utils.get_tilecodes_from_folder(path, las_prefix='m3c2')
    
    # Check if tmp_file exists.
    tmp_path = pathlib.Path(base_folder) / stdsdl / tmp_file
    if resume and os.path.exists(tmp_path):
        with open(tmp_path, 'rb') as f:
            static_obstacles = pickle.load(f)
            tiles = tiles - set(static_obstacles['tilecode'])
            static_obstacles = None
    
    tiles = list(tiles)
    tiles.sort()
    all_tiles['stadsdeel'].extend([stdsdl]*len(tiles))
    all_tiles['tilecode'].extend(tiles)

all_tiles = pd.DataFrame(all_tiles)

In [None]:
tile_tqdm = tqdm(total=len(all_tiles), unit='tile', smoothing=0)

for part_df in all_tiles.groupby(['stadsdeel']):
    stdsdl = part_df[0]
    
    tmp_path = pathlib.Path(base_folder) / stdsdl / tmp_file
    if resume and os.path.exists(tmp_path):
        with open(tmp_path, 'rb') as f:
            static_obstacles = pickle.load(f)
    else:
        static_obstacles = {'tilecode': [],
                            'type': [],
                            'geometry': []}

    # TODO: parallelize this inner loop
    for i, row in part_df[1].reset_index().iterrows():
        tilecode = row['tilecode']
        tile_tqdm.set_postfix_str(f'{stdsdl}/{tilecode}')

        # Read point cloud with M3C2 distances
        in_file = pathlib.Path(base_folder) / stdsdl / pc_data_folder / 'm3c2' / f'm3c2_{tilecode}.laz'
        points, m3c2_distance = sw_utils.read_las(in_file, extra_val='M3C2_distance', extra_val_dtype='float32')

        # Filter for static points
        mask = np.abs(m3c2_distance) < m3c2_threshold

        if np.count_nonzero(mask) > 0:
            # Get the polygons
            polygons, types = c2p.get_obstacle_polygons(points[mask])
            static_obstacles['tilecode'].extend([tilecode]*len(polygons))
            static_obstacles['type'].extend(types)
            static_obstacles['geometry'].extend(polygons)

        if i % resume_batch_size == 0:
            with open(tmp_path, 'wb') as f:
                pickle.dump(static_obstacles, f)
        
        tile_tqdm.update(1)

    static_obstacles_gdf = gpd.GeoDataFrame(static_obstacles, geometry='geometry', crs=CRS)
    static_obstacles = None
    
    # Fix invalid polygons
    static_obstacles_gdf['geometry'] = static_obstacles_gdf['geometry'].apply(poly_utils.fix_invalid)
    
    # Save the obstacle GeoDataFrame.
    out_path = pathlib.Path(base_folder) / stdsdl / output_file
    if len(static_obstacles_gdf) > 0:
        static_obstacles_gdf.to_file(out_path, driver='GPKG')
        static_obstacles_gdf = None
    
    # Delete intermediate output
    if os.path.exists(tmp_path):
        os.remove(tmp_path)
    

tile_tqdm.close()

## Merge sidewalk polygons with obstacles
The found obstacles (polygons) in the previous step are merged with the sidewalk polygons as interiors.

In [None]:
### SETTINGS ###

merged_output_file = f'{out_folder}sidewalks_with_obstacles.gpkg'

# Buffer width around the sidewalk to preserve its shape. Set to '0' to ignore this.
sw_buffer = 0.01

# Add padding around obstacles.
obstacle_padding = 0.05

# Add a buffer around BGT tree locations
tree_buffer = 0.75

In [None]:
def merge_obstacles(row, static_obstacles_gdf, bgt_obst_gdf, obstacle_padding=obstacle_padding, sw_buffer=sw_buffer):
    # Subtract all obstacles that intersect the sidewalk polygon.
    sw_ext, sw_int = poly_utils.extract_interior(row.geometry)
    obst_poly = (static_obstacles_gdf[static_obstacles_gdf.intersects(sw_ext)]
                 .buffer(obstacle_padding)
                 .unary_union)
    # TODO buffer also for terrace shapes?
    bgt_obst_poly = bgt_obst_gdf[bgt_obst_gdf.intersects(sw_ext)].unary_union
    merged_poly = so.unary_union([poly for poly in [obst_poly, bgt_obst_poly, sw_int] if poly is not None])
    # TODO: do something with obstacle type?
    if merged_poly is not None:
        sw_ext = sw_ext.buffer(sw_buffer) - sw_ext.intersection(merged_poly)

    return sw_ext

In [None]:
for stdsdl in stadsdelen:
    # Scraped sidewalk and terras data for the area (see Notebook 1c)
    sidewalk_data = pathlib.Path(base_folder) / stdsdl / bgt_folder / 'bgt_voetpad.gpkg'
    terras_data = pathlib.Path(base_folder) / stdsdl / bgt_folder / 'terras_data.gpkg'
    obstacle_data = pathlib.Path(base_folder) / stdsdl / bgt_folder / 'obstacle_data.gpkg'
    
    out_path = pathlib.Path(base_folder) / stdsdl / output_file
    merged_path = pathlib.Path(base_folder) / stdsdl / merged_output_file

    # Load the sidewalk data
    sidewalk_gdf = gpd.read_file(sidewalk_data, crs=CRS).set_index('ogc_fid')

    # Load the "terras" data
    if pathlib.Path(terras_data).is_file():
        bgt_obst_gdf = gpd.read_file(terras_data, crs=CRS).set_index('id')
    else:
        bgt_obst_gdf = gpd.GeoDataFrame({'geometry': []}, geometry='geometry', crs=CRS)

    # Load the BGT obstacle data
    if pathlib.Path(terras_data).is_file():
        bgt_obst_gdf = pd.concat([bgt_obst_gdf,
                                  gpd.read_file(obstacle_data, crs=CRS).set_index('ogc_fid')])

    # Load the static obstacle data
    static_obstacles_gdf = gpd.read_file(out_path, crs=CRS)

    # Prepare (inflate) BGT obstacle shapes.
    bgt_obst_gdf.loc[bgt_obst_gdf['naam']=='boom', 'geometry'] \
        = bgt_obst_gdf[bgt_obst_gdf['naam']=='boom'].buffer(tree_buffer)

    # Do the merge.
    sw_merged_gdf = sidewalk_gdf.copy()
    sw_merged_gdf['geometry'] = sidewalk_gdf.progress_apply(lambda row: merge_obstacles(row, static_obstacles_gdf, bgt_obst_gdf), axis=1)

    # Save the merged sidewalk data.
    sw_merged_gdf.to_file(merged_path, driver='GPKG')