# Scrape and parse reference data from BGT
This notebook scrapes sidewalk ("voetpad") polygons from the Amsterdam BGT API. The documentation of this data source can be found at:
- https://www.amsterdam.nl/stelselpedia/bgt-index/producten-bgt/prodspec-bgt-dgn-imgeo

In [None]:
from tqdm.notebook import tqdm

import upcp.scrapers.ams_bgt_scraper as ams_bgt_scraper
import upcp.utils.las_utils as las_utils
import upcp.utils.csv_utils as csv_utils

In [None]:
# Output file for the BGT fuser.
bgt_data_file = '../datasets/bgt/bgt_voetpad.csv'
csv_headers = ['bgt_name', 'polygon', 'x_min', 'y_max', 'x_max', 'y_min']
    
# Road and parking spots layers in BGT
bgt_layers = ['BGT_WGL_voetpad']

In [None]:
def get_data_for_bbox(bbox, layers):
    content = []
    for layer in layers:
        # Scrape data from the Amsterdam WFS, this will return a json response.
        json_content = ams_bgt_scraper.scrape_amsterdam_bgt(layer, bbox=bbox)

        # Parse the downloaded json response.
        content += ams_bgt_scraper.parse_polygons(json_content)
    return content

## Single tile or list of tiles

In [None]:
# Specify individual tiles
tiles = ['2386_9702']

# Use all tiles in a folder
# tiles = las_utils.get_tilecodes_from_folder('../datasets/pointclouds/run1/')

bgt_data = []

tile_tqdm = tqdm(tiles, unit='tile', smoothing=0)
for tilecode in tile_tqdm:
    tile_tqdm.set_postfix_str(tilecode)
    
    bbox = las_utils.get_bbox_from_tile_code(tilecode, padding=0)
    bgt_data.extend(get_data_for_bbox(bbox, bgt_layers))

# Write the csv
csv_utils.write_csv(bgt_data_file, bgt_data, csv_headers)

## Scrape an area based on all files in a folder

In [None]:
folder = '../datasets/pointclouds/run1/'
bbox = las_utils.get_bbox_from_las_folder(folder, padding=0)
bgt_data = get_data_for_bbox(bbox, bgt_layers)

# Write the csv
csv_utils.write_csv(bgt_data_file, bgt_data, csv_headers)