In [1]:
import pathlib
import geopandas as gpd
import numpy as np
import pandas as pd

import shapely

In [2]:
data_dir = pathlib.Path('~/data/ivs').expanduser() 
dataset_filename = data_dir / 'ivs-2024-geocoded.gpkg'
hubs_filename = 'hubs.geojson'

hubs = gpd.read_file(hubs_filename)

In [3]:
ivs_gdf = gpd.read_file(dataset_filename)


In [4]:
ivs_gdf.shape

(2504165, 17)

In [5]:
def get_src_point(line):
    src = list(line.coords)[0]
    src = shapely.Point(src)
    return src

def get_target_point(line):
    target = list(line.coords)[-1]
    target = shapely.Point(target)
    return target

In [6]:
ivs_gdf['time'] = pd.to_datetime(ivs_gdf['v05_06_begindt_evenement_iso'], format='ISO8601')
ivs_gdf = ivs_gdf[(ivs_gdf['Weeknr'].isin([35, 36, 37, 38, 39, 40])) & (ivs_gdf['Jaar'] == 2022)]
ivs_gdf = ivs_gdf[ivs_gdf['geometry'] != None]
ivs_gdf['src_geometry'] = ivs_gdf['geometry'].apply(get_src_point)
ivs_gdf['target_geometry'] = ivs_gdf['geometry'].apply(get_target_point)

  ivs_gdf['time'] = pd.to_datetime(ivs_gdf['v05_06_begindt_evenement_iso'], format='ISO8601')


In [7]:
ivs_gdf

Unnamed: 0,Jaarmaand,Jaar,Maand,Weeknr,v05_06_begindt_evenement_iso,v05_06_Begindt_evenement,UNLO_herkomst,UNLO_bestemming,v15_1_Scheepstype_RWS,SK_CODE,v18_Laadvermogen,v28_Beladingscode,v38_Vervoerd_gewicht,v30_4_Containers_TEU_S,nstr_nw,nst2007_nw,geometry,time,src_geometry,target_geometry
256,2210,2022,10,39,2022-10-01T17:00:00+02:00,01 oktober 2022 17:00:00 uur,NLRTM,NLRTM,2,M8,3190.0,3,0.0,0.0,3,7.2,"LINESTRING (4.5 51.91667, 4.5 51.91667)",2022-10-01 17:00:00+02:00,POINT (4.5 51.91667),POINT (4.5 51.91667)
257,2210,2022,10,40,2022-10-03T12:00:00+02:00,03 oktober 2022 12:00:00 uur,NLWKD,BEKOU,1,M8,3380.0,1,,0.0,,,"LINESTRING (4.89319 51.80768, 4.28333 51.25)",2022-10-03 12:00:00+02:00,POINT (4.89319 51.80768),POINT (4.28333 51.25)
258,2210,2022,10,40,2022-10-03T16:00:00+02:00,03 oktober 2022 16:00:00 uur,NLNIJ,NLDRU,1,M8,2900.0,7,2000000.0,0.0,6,3.5,"LINESTRING (5.8386 51.83089, 5.60339 51.88715)",2022-10-03 16:00:00+02:00,POINT (5.8386 51.83089),POINT (5.60339 51.88715)
259,2210,2022,10,40,2022-10-06T15:00:00+02:00,06 oktober 2022 15:00:00 uur,NLDST,NLOOS,1,M6,1190.0,7,1191000.0,0.0,4,3.2,"LINESTRING (5.66667 51.88333, 4.88333 51.65)",2022-10-06 15:00:00+02:00,POINT (5.66667 51.88333),POINT (4.88333 51.65)
260,2210,2022,10,40,2022-10-08T07:00:00+02:00,08 oktober 2022 07:00:00 uur,NLNLG,NLRTM,1,M8,3240.0,1,,0.0,,,"LINESTRING (5.72343 52.64085, 4.5 51.91667)",2022-10-08 07:00:00+02:00,POINT (5.72343 52.64085),POINT (4.5 51.91667)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2502798,2209,2022,9,39,2022-09-28T10:00:00+02:00,28 september 2022 10:00:00 uur,BELGG,NLMST,1,M1,380.0,7,998000.0,0.0,5,10.1,"LINESTRING (5.56667 50.63333, 5.69347 50.84843)",2022-09-28 10:00:00+02:00,POINT (5.56667 50.63333),POINT (5.69347 50.84843)
2502799,2209,2022,9,39,2022-09-28T10:00:00+02:00,28 september 2022 10:00:00 uur,NLAMS,DEMHG,1,M8,3210.0,7,1700000.0,0.0,2,7.1,"LINESTRING (4.81667 52.4, 8.45 49.48333)",2022-09-28 10:00:00+02:00,POINT (4.81667 52.4),POINT (8.45 49.48333)
2502800,2209,2022,9,39,2022-09-28T22:00:00+02:00,28 september 2022 22:00:00 uur,NLVLI,BEGNE,2,M8,3370.0,7,2250000.0,0.0,3,7.2,"LINESTRING (3.7 51.45, 3.71667 51.05)",2022-09-28 22:00:00+02:00,POINT (3.7 51.45),POINT (3.71667 51.05)
2502864,2209,2022,9,39,2022-09-29T01:00:00+02:00,29 september 2022 01:00:00 uur,DEMHG,BEANR,2,M9,3370.0,3,0.0,0.0,3,7.2,"LINESTRING (8.45 49.48333, 4.41667 51.21667)",2022-09-29 01:00:00+02:00,POINT (8.45 49.48333),POINT (4.41667 51.21667)


In [8]:
routes = pd.merge(hubs, hubs, how='cross')
routes = routes[routes['name_x'] != routes['name_y']]
routes

Unnamed: 0,name_x,geometry_x,name_y,geometry_y
1,Nijmegen,"POLYGON ((5.70063 51.93251, 5.64461 51.75291, ...",Rotterdam,"POLYGON ((4.56209 51.91397, 4.48988 51.9333, 4..."
2,Nijmegen,"POLYGON ((5.70063 51.93251, 5.64461 51.75291, ...",Duisburg,"POLYGON ((6.8501 51.45693, 6.7234 51.53244, 6...."
3,Rotterdam,"POLYGON ((4.56209 51.91397, 4.48988 51.9333, 4...",Nijmegen,"POLYGON ((5.70063 51.93251, 5.64461 51.75291, ..."
5,Rotterdam,"POLYGON ((4.56209 51.91397, 4.48988 51.9333, 4...",Duisburg,"POLYGON ((6.8501 51.45693, 6.7234 51.53244, 6...."
6,Duisburg,"POLYGON ((6.8501 51.45693, 6.7234 51.53244, 6....",Nijmegen,"POLYGON ((5.70063 51.93251, 5.64461 51.75291, ..."
7,Duisburg,"POLYGON ((6.8501 51.45693, 6.7234 51.53244, 6....",Rotterdam,"POLYGON ((4.56209 51.91397, 4.48988 51.9333, 4..."


In [9]:
route_dfs = []

for i, row in routes.iterrows():
    index = np.logical_and.reduce([
        ivs_gdf['src_geometry'].apply(lambda geometry: geometry.intersects(row['geometry_x'])),
        ivs_gdf['target_geometry'].apply(lambda geometry: geometry.intersects(row['geometry_y']))
    ])
    route_df = ivs_gdf[index].copy()
    route = f"{row['name_x']}-{row['name_y']}"
    route_df["route"] = route
    route_df["source"] = row["name_x"]
    route_df["target"] = row["name_y"]
    route_dfs.append(route_df)

all_routes_gdf = pd.concat(route_dfs)

In [10]:
all_routes_gdf.to_parquet("week_38_routes.parquet")

In [11]:
all_routes_gdf.drop(columns=['src_geometry', 'target_geometry', 'time']).to_file('week_38_routes.gpkg')

In [12]:
!open .

In [15]:
all_routes_gdf['route'].value_counts()

route
Rotterdam-Duisburg    810
Duisburg-Rotterdam    794
Rotterdam-Nijmegen     67
Nijmegen-Rotterdam     59
Duisburg-Nijmegen       8
Nijmegen-Duisburg       7
Name: count, dtype: int64

In [18]:
all_routes_gdf.groupby('route')['v30_4_Containers_TEU_S'].sum()

route
Duisburg-Nijmegen       537.0
Duisburg-Rotterdam    18583.0
Nijmegen-Duisburg       473.0
Nijmegen-Rotterdam     5274.0
Rotterdam-Duisburg    23334.0
Rotterdam-Nijmegen     6413.0
Name: v30_4_Containers_TEU_S, dtype: float64