In [1]:
import pathlib
import re

import geopandas as gpd
import pandas as pd

In [2]:
data_dir = pathlib.Path('~/data/euris').expanduser()
version = 'v0.1.0'

In [3]:
node_paths = list(data_dir.glob('Node_*.geojson'))
sections_paths = list(data_dir.glob('FairwaySection_*.geojson'))

node_path_re = re.compile(r'Node_(?P<countrycode>[A-Z]+)_\d+.geojson')
sections_path_re = re.compile(r'FairwaySection_(?P<countrycode>[A-Z]+)_\d+.geojson')


In [4]:
node_gdfs = []
for node_path in node_paths:
    node_gdf = gpd.read_file(node_path)
    # add file that data came from (includes version)
    node_gdf['path'] = node_path.name
    node_gdfs.append(node_gdf)
node_gdf = pd.concat(node_gdfs)

In [5]:
section_gdfs = []
for section_path in sections_paths:
    section_gdf = gpd.read_file(section_path)
    # add file that data came from (includes version)
    section_gdf['path'] = section_path.name
    section_gdfs.append(section_gdf)
section_gdf = pd.concat(section_gdfs)

  section_gdf = pd.concat(section_gdfs)


In [6]:
uniq_columns = set(node_gdf.columns) - {'path'}
n_nodes_duplicated = node_gdf.duplicated(subset=uniq_columns).sum()
node_gdf = node_gdf.drop_duplicates(subset=uniq_columns)
uniq_columns = set(section_gdf.columns) - {'path'}
n_sections_duplicated = section_gdf.duplicated(subset=uniq_columns).sum()
section_gdf = section_gdf.drop_duplicates(subset=uniq_columns)
print(f'removed {n_nodes_duplicated} duplicated nodes, {n_sections_duplicated} duplicated sections.')




node_gdf['countrycode_locode'] = node_gdf['locode'].apply(
    lambda x: x[:2]
)
node_gdf['countrycode_path'] = node_gdf['path'].apply(
    lambda x: node_path_re.match(x).group('countrycode')
)
node_gdf['countrycode'] = node_gdf['countrycode_locode']

node_gdf['node_id'] = node_gdf.apply(lambda row: f"{row['countrycode']}_{row['objectcode']}", axis=1)

# TODO: add columns with section_id (for consistent naming)

removed 291 duplicated nodes, 138 duplicated sections.


In [7]:
node_gdf.to_file(data_dir / f'nodes-{version}.geojson')
section_gdf.to_file(data_dir / f'sections-{version}.geojson')

In [8]:
node_gdf.shape[0], section_gdf.shape[0] * 2

(13592, 13592)

In [9]:
node_gdf[node_gdf['countrycode_locode'] != node_gdf['countrycode_path']]

Unnamed: 0,objectcode_cb,hectom_cb,sectionref_cb,locode_cb,function,ww_name,ww_name_cb,rt_name,rt_name_cb,wwauthorit,...,objectname,sectionref,hectom,vplnpoint,geometry,path,countrycode_locode,countrycode_path,countrycode,node_id
0,,00056,NL0150C00530,NLSVW0150CJ352400056,junction,Gemeenschappelijke Maas,Maas,Gemeenschappelijke Maas,Maas van Wessem naar Ohe en Laak,De Vlaamse Waterweg nv,...,Jachthaven Stevensweert,BE0440400000,00624,0.0,POINT (5.8444 51.13834),Node_NL_20250521.geojson,BE,NL,BE,BE_F5570
1,,00059,NL0150C00530,NLSVW0150CJ696500059,junction,Gemeenschappelijke Maas,Maas,Gemeenschappelijke Maas,Maas van Wessem naar Ohe en Laak,De Vlaamse Waterweg nv,...,Aansluiting Grensmaas Visakker,BE0440400000,00620,0.0,POINT (5.84338 51.13535),Node_NL_20250521.geojson,BE,NL,BE,BE_F5569
2,,00059,NL0150C00000,NLSVW0150CJ696500059,junction,Gemeenschappelijke Maas,Maas,Gemeenschappelijke Maas,Maas van Wessem naar Ohe en Laak,De Vlaamse Waterweg nv,...,Aansluiting Grensmaas Visakker,BE0440300000,00620,0.0,POINT (5.84338 51.13535),Node_NL_20250521.geojson,BE,NL,BE,BE_F5569
3,,00111,NL0150C00000,NLSVW0150CJ245100111,junction,Gemeenschappelijke Maas,Maas,Gemeenschappelijke Maas,Maas van Wessem naar Ohe en Laak,De Vlaamse Waterweg nv,...,BEMSK04403F556900568,BE0440300000,00568,1.0,POINT (5.82349 51.10814),Node_NL_20250521.geojson,BE,NL,BE,BE_F5199
4,,00046,NL0150C00564,NLMSB0150CJ477000046,junction,Gemeenschappelijke Maas,Maas,Gemeenschappelijke Maas,Maas van Wessem naar Ohe en Laak,De Vlaamse Waterweg nv,...,Grenspunt Kinrooi Gemeenschappelijke Maas (Ned...,BE0440500000,00636,1.0,POINT (5.85573 51.1446),Node_NL_20250521.geojson,BE,NL,BE,BE_F5572
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1252,J3356,03137,DE0390100735,DEXXX03901J335603137,junction,Rhin,Rhein,Rhin canalise,Rhein,VNF,...,Rhin canalise - Port autonome de Strasbourg,FRVN25300038,3137,0.0,POINT (7.9562 48.71581),Node_DE_20250616.geojson,FR,DE,FR,FR_J1228
1253,J3356,03137,DE0390100736,DEXXX03901J335603137,junction,Rhin,Rhein,Rhin canalise,Rhein,VNF,...,Rhin canalise - Port autonome de Strasbourg,FRVN25300039,3137,0.0,POINT (7.9562 48.71581),Node_DE_20250616.geojson,FR,DE,FR,FR_J1228
1254,J3355,03319,DE0390100736,DEXXX03901J335503319,junction,Rhin,Rhein,Rhin canalise,Rhein,VNF,...,FRXXXVN2530000003319,FRVN25300039,3319,0.0,POINT (8.10109 48.81525),Node_DE_20250616.geojson,FR,DE,FR,FR_N3208
1255,J3355,03319,DE0390100737,DEXXX03901J335503319,junction,Rhin,Rhein,Rhin canalise,Rhein,VNF,...,FRXXXVN2530000003319,FRVN25300040,3319,0.0,POINT (8.10109 48.81525),Node_DE_20250616.geojson,FR,DE,FR,FR_N3208
