In [1]:
import requests
import pandas as pd

import IPython.display
import lxml.etree
import scrapy

In [2]:
url = 'https://www.vaarweginformatie.nl/wfswms/queryservice/1.4/current/bridge'
wadl_url = 'https://www.vaarweginformatie.nl/wfswms/dataservice'
wadl_path = wadl_url + '/application.wadl'
wadl_path

'https://www.vaarweginformatie.nl/wfswms/dataservice/application.wadl'

In [3]:
query_url = 'https://www.vaarweginformatie.nl/wfswms/queryservice/application.wadl'

In [4]:
resp = requests.get(wadl_path)
wadl = lxml.etree.fromstring(resp.content)

In [5]:
resources = wadl.find('.//{http://wadl.dev.java.net/2009/02}resources')
root = resources[-1]
version = root.attrib['path']
version_path = f"{wadl_url}/{root.attrib['path']}"
version

'1.4'

In [6]:
resources = root.findall('.//{http://wadl.dev.java.net/2009/02}resource')
resources

[<Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efa40>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efd80>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efdc0>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efe00>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efe40>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efe80>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0efec0>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0eff00>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0eff40>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0eff80>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0d1b80>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c0effc0>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c200040>,
 <Element {http://wadl.dev.java.net/2009/02}resource at 0x16c200080>,
 <Element {http://wa

In [7]:
geogenerations = [resource for resource in resources if resource.attrib.get("path") == "geogeneration"]
geogeneration = geogenerations[0]

In [27]:
geogeneration_path = f"{version_path}/{geogeneration.attrib['path']}"
geogeneration_path

'https://www.vaarweginformatie.nl/wfswms/dataservice/1.4/geogeneration'

In [9]:
geogeneration_json = requests.get(geogeneration_path).json()
geo_gen_id = geogeneration_json["GeoGeneration"]
geogeneration_json

{'GeoType': 'geogeneration',
 'GeoGeneration': 3510,
 'PublicationDate': '2024-11-29T12:10:35.199Z',
 'Active': True}

In [28]:
geotypes = [resource for resource in resources if resource.attrib.get("path") == "geotype"]
geotype = geotypes[0]
geotype_path = f"{version_path}/{geotype.attrib['path']}"
geotype_json = requests.get(geotype_path).json()
geotype_json
geotype_path

'https://www.vaarweginformatie.nl/wfswms/dataservice/1.4/geotype'

In [11]:
records = {}
for geo_type in ["ferrylandingpoint", "ferry"]:
    next_page = True
    offset = 0
    results = []
    while next_page:
        
        url = f"{version_path}/{geo_gen_id}/{geo_type}?offset={offset}"
        result = requests.get(url).json()
        results.extend(result['Result'])
        next_page = result['Offset'] + result['Count'] < result['TotalCount']
        if next_page:
            offset = result['Offset'] + result['Count']
    records[geo_type] = results


In [29]:
url

'https://www.vaarweginformatie.nl/wfswms/dataservice/1.4/3510/ferry?offset=100'

In [12]:
pd.DataFrame(records["ferrylandingpoint"])

Unnamed: 0,Id,GeoType,GeoGeneration,Name,Geometry,VinCode,RouteId,RouteKmBegin,RouteKmEnd,AdministrationId,...,BerthId,CanBeUsedByRoro,NumberOfLandingPoints,NumberOfFerryLandingPoints,NumberOfQuays,FerryId,NwbHarbourId,VinHarbourId,Condition,IsrsId
0,17127,ferrylandingpoint,1686,Veersteiger ViN(31433),POINT (4.99726402755186 52.1894496876556),31433,20199,23.400,23.400,9122.0,...,24406.0,False,0.0,0.0,1.0,8880.0,,,,
1,17287,ferrylandingpoint,1686,Veersteiger ViN(63954),POINT (6.00702166557312 51.2584082708073),63954,41037,57.300,57.300,,...,58662.0,False,,1.0,,33215.0,,,,
2,17351,ferrylandingpoint,1686,Veersteiger ViN(62009),"LINESTRING (6.93606218459887 53.3278539516416,...",62009,47883,0.000,0.620,48595.0,...,,True,1.0,0.0,0.0,3298.0,43235.0,,,
3,17376,ferrylandingpoint,1686,Veersteiger ViN(31332),POINT (5.96094131469726 51.9370875426895),31332,55181,10.000,10.000,9122.0,...,4633.0,False,0.0,2.0,0.0,49377.0,,,,
4,17586,ferrylandingpoint,1686,Veersteiger ViN(31712),POINT (5.04959106445312 51.7104835074799),31712,41037,202.000,202.000,9122.0,...,55035.0,False,0.0,2.0,0.0,35411.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,3782,ferrylandingpoint,2252,Veersteiger ViN(31688),POINT (5.54603576660156 51.818609495492),31688,41037,158.000,158.000,9122.0,...,5027.0,False,0.0,1.0,0.0,21356.0,,,,
384,4103,ferrylandingpoint,2754,Veersteiger ViN(45021),POINT (6.20286008426512 53.4680173405809),45021,4871,7.817,7.817,11304.0,...,34792.0,False,1.0,0.0,0.0,56164.0,,,,
385,4395,ferrylandingpoint,1686,Veersteiger ViN(59612),POINT (5.95572393079484 52.8332662333904),59612,49027,1.942,1.942,,...,46696.0,False,0.0,0.0,1.0,50048.0,,,,
386,4412,ferrylandingpoint,1686,Veersteiger ViN(49554),POINT (5.66877024670822 52.99858277782),49554,28575,5.181,5.181,10588.0,...,42927.0,False,1.0,0.0,1.0,57248.0,,,,


In [26]:
schemas = {}
for geo_type in geotype_json:
    schema = requests.get(f'https://www.vaarweginformatie.nl/wfswms/dataservice/{version}/schema/{geo_type}').json()
    schemas[geo_type] = schema

In [14]:
# https://github.com/scrapy-plugins/scrapy-jsonschema
# https://www.vaarweginformatie.nl/wfswms/dataservice/1.4/schema/bridge

In [32]:
schemas['headoffice']

{'name': 'HeadOffice',
 'properties': {'Id': {'type': 'number',
   'required': True,
   'description': 'The given Id for the object'},
  'GeoType': {'type': 'string',
   'required': True,
   'description': 'The type of object'},
  'GeoGeneration': {'type': 'number',
   'required': True,
   'description': 'The GeoGeneration in which this object was last modified or added'},
  'Name': {'type': 'string', 'description': 'The given name for the object'},
  'Geometry': {'type': 'string',
   'description': 'The Geometry of the object in the well-known text format'},
  'ForeignCode': {'type': 'string',
   'description': 'The foreign code given to the Object'},
  'VinCode': {'type': 'string',
   'description': 'The vin code given to the Object'},
  'AdministrationId': {'type': 'number',
   'description': 'The AdministrationId of the HeadOffice'},
  'WorkingAreaId': {'type': 'number',
   'description': 'The WorkingAreaId of the HeadOffice'},
  'VhfChannel': {'type': 'string',
   'description': '