# Scrape and parse reference data from public data sources
This notebook scrapes data from the Amsterdam BGT API and National Road Traffic Data Portal
(NDW). The documentation of both data sources can be found at:
- https://www.amsterdam.nl/stelselpedia/bgt-index/producten-bgt/prodspec-bgt-dgn-imgeo/
- https://docs.ndw.nu/api/trafficsigns/nl/index.html

In [None]:
# Uncomment to load the local package rather than the pip-installed version.
# Add project src to path.
import set_path

In [None]:
import upcp.scrapers.ams_bgt_scraper as ams_bgt_scraper
import upcp.scrapers.ndw_scraper as ndw_scraper
import upcp.utils.las_utils as las_utils
import upcp.utils.csv_utils as csv_utils

## Specify the bounding box of the area we want to scrape

In [None]:
# Single file
tilecode = '2386_9702'
# tilecode = '2397_9705'
bbox = las_utils.get_bbox_from_tile_code(tilecode, padding=5)

In [None]:
# All files in a folder
bbox = las_utils.get_bbox_from_las_folder('../datasets/pointcloud/', padding=5)

## Scrape building footprint data from BGT

In [None]:
# Output file for the BGT fuser.
bgt_data_file = '../datasets/bgt/bgt_buildings_demo.csv'
csv_headers = ['bgt_type', 'polygon', 'x_min', 'y_max', 'x_max', 'y_min']

bgt_layer = 'BGT_PND_pand'

In [None]:
# Scrape data from the Amsterdam WFS, this will return a json response.
json_response = ams_bgt_scraper.scrape_amsterdam_bgt(bgt_layer, bbox=bbox)

# Parse the downloaded json response.
parsed_content = ams_bgt_scraper.parse_polygons(json_response)

# Write the csv
csv_utils.write_csv(bgt_data_file, parsed_content, csv_headers)

## Scrape road information from BGT

In [None]:
# Output file for the BGT fuser.
bgt_data_file = '../datasets/bgt/bgt_roads_demo.csv'
csv_headers = ['bgt_type', 'polygon', 'x_min', 'y_max', 'x_max', 'y_min']
    
# Road and parking spots layers in BGT
bgt_layers = ['BGT_WGL_rijbaan_lokale_weg', 'BGT_WGL_parkeervlak',
              'BGT_WGL_rijbaan_autoweg', 'BGT_WGL_rijbaan_autosnelweg',
              'BGT_WGL_rijbaan_regionale_weg', 'BGT_WGL_ov-baan',
              'BGT_WGL_fietspad']

In [None]:
# Scrape data from the Amsterdam WFS and parse the json.
bgt_road_polygons_csv = []
for layer in bgt_layers:
    # Scrape data from the Amsterdam WFS, this will return a json response.
    json_content = ams_bgt_scraper.scrape_amsterdam_bgt(layer, bbox=bbox)
    
    # Parse the downloaded json response.
    parsed_content = ams_bgt_scraper.parse_polygons(json_content)
    bgt_road_polygons_csv += parsed_content

# Write the csv
csv_utils.write_csv(bgt_data_file, bgt_road_polygons_csv, csv_headers)

## Scrape pole-like object locations from BGT and NDW

In [None]:
# Output file for the BGT fuser.
bgt_data_file = '../datasets/bgt/custom_poles_demo.csv'
csv_headers = ['bgt_type', 'x', 'y']

# BGT layers (without the traffic sign layer)
bgt_layers = ['BGTPLUS_PAL_lichtmast', 'BGTPLUS_VGT_boom']

In [None]:
# Scrape tree and lamp post data from BGT and parse the json.
bgt_point_objects_csv = []
for layer in bgt_layers:
    # Scrape data from the Amsterdam WFS, this will return a json response.
    json_content = ams_bgt_scraper.scrape_amsterdam_bgt(layer, bbox=bbox)
    
    # Parse the downloaded json response.
    parsed_content = ams_bgt_scraper.parse_points_bgtplus(json_content)
    bgt_point_objects_csv += parsed_content

# Scrape traffic sign data from NDW. The municipality code of Amsterdam is GM0363.
json_content = ndw_scraper.scrape_ndw(town_code='GM0363')

# Parse the downloaded json response.
parsed_content = ndw_scraper.parse_traffic_signs(json_content, bbox)
bgt_point_objects_csv += parsed_content

# Write the csv
csv_utils.write_csv(bgt_data_file, bgt_point_objects_csv, csv_headers)

## Scrape street furniture object locations from BGT
Please note, <x,y> coordinates for the bgt layers "BGTPLUS_BAK_afvalbak" and "BGTPLUS_SMR_bank" are not available for the example point cloud tiles. Therefore, manually generated data is provided in this repository for demonstration purposes.

In [None]:
# Output file for the BGT fuser.
bgt_data_file = '../datasets/bgt/bgt_street_furniture_demo.csv'
csv_headers = ['bgt_type', 'x', 'y']

# BGT layers
bgt_layers = ['BGTPLUS_BAK_afvalbak', 'BGTPLUS_SMR_bank']

In [None]:
# Scrape street furniture data from BGT and parse the json.
bgt_point_objects_csv = []
for layer in bgt_layers:
    # Scrape data from the Amsterdam WFS, this will return a json response.
    json_content = ams_bgt_scraper.scrape_amsterdam_bgt(layer, bbox=bbox)
    
    # Parse the downloaded json response.
    parsed_content = ams_bgt_scraper.parse_points_bgtplus(json_content)
    bgt_point_objects_csv += parsed_content

# Write the csv (uncomment when data is available)
#csv_utils.write_csv(bgt_data_file, bgt_point_objects_csv, csv_headers)