# Scrape and parse reference data

This notebook helps to scrape the necessary reference data (mainly BGT-like data in the form of points and polygons). Please follow [this link](https://github.com/Amsterdam-AI-Team/Urban_PointCloud_Processing/blob/main/notebooks/1.%20AHN%20preprocessing.ipynb) to download and pre-process AHN elevation data in .npz format.

In [None]:
# Add project src to path.
import set_path

import pathlib
import pandas as pd
import geopandas as gpd

from upcp.utils import las_utils

import upc_sw.poly_utils as poly_utils
import upc_sw.scraping_utils as scraping

In [None]:
### SETTINGS ###

# Specify individual tiles to scrape data for
tiles = ['2386_9702']

# Or, use all tiles in a given folder
pc_folder = '../datasets/pointcloud/'
# tiles = las_utils.get_tilecodes_from_folder(pc_folder)

# Output file for the BGT fuser.
bgt_folder = '../datasets/bgt/'

# Create folder if it does not exist
pathlib.Path(bgt_folder).mkdir(parents=True, exist_ok=True)

## 1. Sidewalk polygons

We first scrape sidewalk ("voetpad") polygons from the Amsterdam BGT API. The documentation of this data source can be found at:
- https://www.amsterdam.nl/stelselpedia/bgt-index/producten-bgt/prodspec-bgt-dgn-imgeo

In [None]:
# Output file
bgt_data_file = f'{bgt_folder}bgt_voetpad.gpkg'

# BGT layers and output headers
bgt_layers = ['BGT_WGL_voetpad']

In [None]:
# Process single tile or list of tiles
gdf = scraping.process_tiles(tiles, bgt_layers)

In [None]:
# Or, scrape an area based on all files in a folder
gdf = scraping.process_folder(pc_folder, bgt_layers)

In [None]:
# Save as GeoPackage file
gdf.to_file(bgt_data_file, driver='GPKG')

## 2. "Terras" polygons

We use these to mark "horeca" terraces as static obstacles.

See https://data.amsterdam.nl/datasets/GsY50tEkoJKCGw/

In [None]:
# Output file
terras_data_file = f'{bgt_folder}terras_data.gpkg'

In [None]:
# Process single tile or list of tiles
gdf = scraping.process_tiles(tiles, None, scraper=scraping.get_terras_data_for_bbox)

In [None]:
# Or, scrape an area based on all files in a folder
gdf = scraping.process_folder(pc_folder, None, scraper=scraping.get_terras_data_for_bbox)

In [None]:
# Save as GeoPackage file
gdf.to_file(terras_data_file, driver='GPKG')

## 3. BGT obstacle points

For now only trees.

In [None]:
# Output file
obstacle_data_file = f'{bgt_folder}obstacle_data.gpkg'

# BGT layers and output headers
bgt_layers = ['BGTPLUS_VGT_boom']

In [None]:
# Process single tile or list of tiles
gdf = scraping.process_tiles(tiles, bgt_layers)

In [None]:
# Or, scrape an area based on all files in a folder
gdf = scraping.process_folder(pc_folder, bgt_layers)

In [None]:
# Save as GeoPackage file
gdf.to_file(obstacle_data_file, driver='GPKG')