In [1]:
import pathlib
import re

import geopandas as gpd
import pandas as pd

In [2]:
data_dir = pathlib.Path('~/data/euris').expanduser()
version = 'v0.1.0'

In [3]:
node_paths = list(data_dir.glob('Node_*.geojson'))
sections_paths = list(data_dir.glob('FairwaySection_*.geojson'))

node_path_re = re.compile(r'Node_(?P<countrycode>[A-Z]+)_\d+.geojson')
sections_path_re = re.compile(r'FairwaySection_(?P<countrycode>[A-Z]+)_\d+.geojson')


In [4]:
node_gdfs = []
for node_path in node_paths:
    node_gdf = gpd.read_file(node_path)
    # add file that data came from (includes version)
    node_gdf['path'] = node_path.name
    node_gdfs.append(node_gdf)
node_gdf = pd.concat(node_gdfs)

In [5]:
section_gdfs = []
for section_path in sections_paths:
    section_gdf = gpd.read_file(section_path)
    # add file that data came from (includes version)
    section_gdf['path'] = section_path.name
    section_gdfs.append(section_gdf)
section_gdf = pd.concat(section_gdfs)

  section_gdf = pd.concat(section_gdfs)


In [6]:
uniq_columns = set(node_gdf.columns) - {'path'}
n_nodes_duplicated = node_gdf.duplicated(subset=uniq_columns).sum()
node_gdf = node_gdf.drop_duplicates(subset=uniq_columns)
uniq_columns = set(section_gdf.columns) - {'path'}
n_sections_duplicated = section_gdf.duplicated(subset=uniq_columns).sum()
section_gdf = section_gdf.drop_duplicates(subset=uniq_columns)
print(f'removed {n_nodes_duplicated} duplicated nodes, {n_sections_duplicated} duplicated sections.')




node_gdf['countrycode_locode'] = node_gdf['locode'].apply(
    lambda x: x[:2]
)
node_gdf['countrycode_path'] = node_gdf['path'].apply(
    lambda x: node_path_re.match(x).group('countrycode')
)
node_gdf['countrycode'] = node_gdf['countrycode_locode']

node_gdf['node_id'] = node_gdf.apply(lambda row: f"{row['countrycode']}_{row['objectcode']}", axis=1)


removed 291 duplicated nodes, 135 duplicated sections.


In [7]:
node_gdf.to_file(data_dir / f'nodes-{version}.geojson')
section_gdf.to_file(data_dir / f'sections-{version}.geojson')

In [10]:
node_gdf.shape[0], section_gdf.shape[0] * 2

(13472, 13478)

In [11]:
node_gdf[node_gdf['countrycode_locode'] != node_gdf['countrycode_path']]

Unnamed: 0,objectcode_cb,hectom_cb,sectionref_cb,locode_cb,function,ww_name,ww_name_cb,rt_name,rt_name_cb,wwauthorit,...,objectname,sectionref,hectom,vplnpoint,geometry,path,countrycode_locode,countrycode_path,countrycode,node_id
0,J0023,22018,AT0000100002,ATXXX00001J002322014,junction,Donau,Donau,,Donau,WSV,...,DEXXX00401J006022018,DE0040100659,22018,1.0,POINT (13.727 48.51292),Node_AT_20250512.geojson,DE,AT,DE,DE_J0060
1,J0099,22232,AT0000100001,ATXXX00001J009922231,junction,Donau,Donau,,Donau,WSV,...,DEXXX00401J282122232,DE0040100383,22232,,POINT (13.50104 48.58098),Node_AT_20250512.geojson,DE,AT,DE,DE_J2821
2,J0098,22031,AT0000100001,ATXXX00001J009822031,junction,Donau,Donau,,Donau,WSV,...,DEXXX00401J287222031,DE0040100383,22031,,POINT (13.71066 48.51825),Node_AT_20250512.geojson,DE,AT,DE,DE_J2872
3,J0098,22031,AT0000100002,ATXXX00001J009822031,junction,Donau,Donau,,Donau,WSV,...,DEXXX00401J287222031,DE0040100659,22031,,POINT (13.71066 48.51825),Node_AT_20250512.geojson,DE,AT,DE,DE_J2872
4,J0010,18802,AT0000100033,ATXXX00001J001018802,junction,Dunaj,Donau,Dunaj,Donau,Dopravný úrad,...,Ústie Moravy - Štátna hranica AT-SK,SK0000100001,18802,1.0,POINT (16.97613 48.1722),Node_AT_20250512.geojson,SK,AT,SK,SK_J0001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,N1683,00618,FRVN21900005,FRXXXVN219LOCKS00618,junction,Grensleie,Lys,Grensleie,Lys,De Vlaamse Waterweg nv,...,BEMEN18510F190800150,BE1851000000,00150,1.0,POINT (3.10208 50.78134),Node_FR_20241106.geojson,BE,FR,BE,BE_F1908
4,N1683,00618,FRVN21900006,FRXXXVN219LOCKS00618,junction,Grensleie,Lys,Grensleie,Lys,De Vlaamse Waterweg nv,...,BEMEN18511F190800150,BE1851100000,00150,1.0,POINT (3.10208 50.78134),Node_FR_20241106.geojson,BE,FR,BE,BE_F1908
5,N1684,00625,FRVN21900006,FRXXXVN2190000000625,junction,Grensleie,Lys,Grensleie,Lys,De Vlaamse Waterweg nv,...,BEMEN18511F194700157,BE1851100000,00157,1.0,POINT (3.10772 50.78534),Node_FR_20241106.geojson,BE,FR,BE,BE_F1947
6,N1684,00625,FRVN21900007,FRXXXVN2190000000625,junction,Grensleie,Lys,Grensleie,Lys,De Vlaamse Waterweg nv,...,BEMEN18512F194700157,BE1851200000,00157,1.0,POINT (3.10772 50.78534),Node_FR_20241106.geojson,BE,FR,BE,BE_F1947
