# Automatic labeling of ground and road using data fusion

In [1]:
# Uncomment to load the local package rather than the pip-installed version.
# Add project src to path.
import set_path

In [2]:
# Import modules.
import logging
import os
import requests

import upcp.fusion as fusion
from upcp.pipeline import Pipeline
import upcp.region_growing as growing
import upcp.utils.ahn_utils as ahn_utils
import upcp.utils.bgt_utils as bgt_utils
import upcp.utils.las_utils as las_utils
import upcp.utils.log_utils as log_utils
import upcp.utils.csv_utils as csv_utils
from upcp.labels import Labels
import upcp.scrapers.ams_bgt_scraper as ams_bgt_scraper

import seaborn as sns
import matplotlib.pyplot as plt
import functions as f
import geopandas as gpd
import numpy as np
from tqdm import tqdm

# INFO messages will be printed to console.
log_utils.reset_logger()
log_utils.add_console_logger(level=logging.DEBUG)

QSocketNotifier: Can only be used with threads started with QThread
  def groupby_count(xyz, indices, out):
  def groupby_sum(xyz, indices, N, out):
  def groupby_max(xyz, indices, N, out):


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


### Select area to label point clouds for

In [3]:
# Pull polygon of area for which to gather 

# Select area granularity (buurten/wijken/ggwgebieden/stadsdelen, https://api.data.amsterdam.nl/v1/docs/datasets/gebieden.html)
area_granularity = 'wijken'

# SELECT AREA
# district_name = 'Centrum'
# my_area = 'Nieuwmarkt/Lastage'
district_name = 'Nieuw-West'
my_area = 'Osdorp-Midden'

# Get area data
area_url = "https://api.data.amsterdam.nl/v1/gebieden/" + area_granularity
response_area = requests.get(url = area_url, params = {"_pageSize": 1000})
raw_data_area = response_area.json()['_embedded']
print(response_area.status_code)

# Create a proper GeoDataFrame with the area data
gdf_area = f.create_area_gdf(raw_data_area)
# gdf_area = gdf_area.to_crs("EPSG:4326")
gdf_area_sel = gdf_area[gdf_area['naam'] == my_area]
area_polygon = gdf_area_sel.iloc[0]['geometry']

sns.set()
ax = gdf_area.boundary.plot()
ax = gdf_area_sel.plot(ax=ax)
plt.title('Selected: {}'.format(my_area))
plt.show()

### Collect point clouds in area

In [None]:
# Mount Azure base folder to access point cloud and ahn data
os.system('sudo blobfuse /home/azureuser/cloudfiles/code/blobfuse/ovl --tmp-path=/mnt/resource/blobfusetmp --config-file=/home/azureuser/cloudfiles/code/blobfuse/fuse_connection_ovl.cfg -o attr_timeout=3600 -o entry_timeout=3600 -o negative_timeout=3600 -o allow_other -o nonempty')
base_folder = "/home/azureuser/cloudfiles/code/blobfuse/ovl/"

# Select district
code_to_name_dict = {
    'Oost': 'nl-amsv-201001-7415-laz',
    'Westpoort': 'nl-amsd-200823-7415-laz',
    'West': 'nl-amsd-200824-7415-laz',
    'Zuid': 'nl-amsd-200903-7415-laz',
    'Ring': 'nl-amsd-200904-7415-laz',
    'Noord': 'nl-amsd-200918-7415-laz',
    'Nieuw-West': 'nl-amsd-200920-7415-laz',
    'Centrum': 'nl-amsd-200921-7415-laz',
    'Zuidoost': 'nl-amsd-200923-7415-laz',
    'Oost': 'nl-amsv-201001-7415-laz'
}

# Data folder and file for the fusers.
district_code = code_to_name_dict[district_name]

ahn_in_folder = base_folder + 'ahn/Amsterdam/ahn4_npz/'
bgt_in_folder = base_folder + 'bgt/bgt_roads/{}/'.format(my_area.replace('/', '-'))
in_folder_point_clouds = base_folder + "pointcloud/Unlabeled/Amsterdam/{}/""las_processor_bundled_out/".format(district_code)
out_folder_point_clouds = base_folder + 'pointcloud/Labeled/{}/ground_and_road/'.format(district_code)

for path in [bgt_in_folder, out_folder_point_clouds]:
    if not os.path.isdir(path):
        os.makedirs(path)

In [None]:
# Collect all tiles in polygon area
all_pc_filenames = np.array([in_folder_point_clouds + i for i in os.listdir(in_folder_point_clouds) if '.laz' in i])
all_pc_tilecodes = np.array([las_utils.get_tilecode_from_filename(filename) for filename in all_pc_filenames])
all_pc_bboxes = np.array([las_utils.get_bbox_from_tile_code(tilecode) for tilecode in all_pc_tilecodes])

all_pc_polygons = [las_utils.get_polygon_from_tile_code(tilecode) for tilecode in all_pc_tilecodes]
gdf_pc_polygons = gpd.GeoDataFrame(geometry=all_pc_polygons)

gdf = gdf_pc_polygons.intersection(area_polygon)
pc_idxs_in_area_polygon = gdf[~gdf.is_empty].index.to_list()

# filenames, tilecodes and bounding boxes of point clouds in selected area
pc_filenames = all_pc_filenames[pc_idxs_in_area_polygon]
pc_tilecodes = all_pc_tilecodes[pc_idxs_in_area_polygon]
pc_bboxes = all_pc_bboxes[pc_idxs_in_area_polygon]

sns.set()
ax = gdf_pc_polygons.boundary.plot()
ax = gdf_area_sel.boundary.plot(ax=ax, color='black')
plt.title('Point clouds in selected area')
plt.show()

### Ground and building fuser using pre-processed BGT and AHN data

In [None]:
# Write all BGT files for tiles in selected area if they do not already exist. 
#TODO this cell can error due to too many requests send to the api. Current fix: restart kernel and run all cells again to infer missing bgt files
for pc_tilecode, pc_bbox in tqdm(zip(pc_tilecodes, pc_bboxes), total=len(pc_tilecodes)):

    # Output file for the BGT fuser.
    bgt_data_file = bgt_in_folder + '{}.csv'.format(pc_tilecode)
    if not os.path.isfile(bgt_data_file):
        csv_headers = ['bgt_name', 'polygon', 'x_min', 'y_max', 'x_max', 'y_min']
            
        # Road and parking spots layers in BGT
        bgt_layers = ['BGT_WGL_rijbaan_lokale_weg', 'BGT_WGL_rijbaan_regionale_weg',
                    'BGT_WGL_rijbaan_autoweg', 'BGT_WGL_rijbaan_autosnelweg',
                    'BGT_WGL_parkeervlak', 'BGT_WGL_ov-baan', 'BGT_WGL_fietspad']

        # Scrape data from the Amsterdam WFS and parse the json.
        bgt_road_polygons_csv = []
        for layer in bgt_layers:
            # Scrape data from the Amsterdam WFS, this will return a json response.
            json_content = ams_bgt_scraper.scrape_amsterdam_bgt(layer, bbox=pc_bbox)
            
            # Parse the downloaded json response.
            parsed_content = ams_bgt_scraper.parse_polygons(json_content)
            bgt_road_polygons_csv += parsed_content

        # Write the csv
        csv_utils.write_csv(bgt_data_file, bgt_road_polygons_csv, csv_headers)

In [None]:
# Create the reader for .npz data.
npz_reader = ahn_utils.NPZReader(ahn_in_folder)

# Label point clouds
for tilecode, filename in tqdm(zip(pc_tilecodes, pc_filenames), total=len(pc_tilecodes)):

    # get file directories
    bgt_road_file = bgt_in_folder + tilecode + '.csv'
    pc_in_file = filename 
    pc_out_file = out_folder_point_clouds + 'road_ground_labeled_' + tilecode + '.laz'
    if not os.path.isfile(pc_out_file):

        # Create reader for BGT road part polygons.
        bgt_road_reader = bgt_utils.BGTPolyReader(bgt_file=bgt_road_file)

        # Create fusers
        params = {'bottom': 0., 'buffer': 0.02}
        npz_ground_fuser = fusion.AHNFuser(Labels.GROUND, ahn_reader=npz_reader,
                                    target='ground', epsilon=0.2, params=params)
        road_part_fuser = fusion.BGTRoadFuser(Labels.ROAD, bgt_reader=bgt_road_reader)

        # Pipeline to label ground and road 
        process_sequence = (npz_ground_fuser, road_part_fuser)
        pipeline = Pipeline(processors=process_sequence, caching=False)

        # Process the file.
        pipeline.process_file(pc_in_file, out_file=pc_out_file)