### Import packages and set paths

In [None]:
import os

import dask.dataframe as dd
import dask_gateway
import dask.distributed

import dotenv
import warnings
warnings.filterwarnings('ignore')

import copy
import datetime
import geopandas as gpd
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import movingpandas as mpd
import networkx as nx
import numpy as np
import pandas as pd
import pathlib
import pickle
import pyarrow as pa
import pyproj
import pytz
from shapely import wkt
from shapely.geometry import Polygon, LineString, Point
from shapely.ops import transform
import sys
import time

In [None]:
#sets the path to load pre-processed ais data
folder_name = '2022_PoR'
path_name = 'abfs://ais/parquet/' + folder_name  

#sets the path to load other local data
current_directory = os.getcwd()
path = current_directory.split("\\01_Data_Analysis\\02_AIS_data")[0]

### Loads the access token (we use a SAS-token to protect the data)

In [None]:
# this is for environmental variables for secrets (needs python-dotenv)
# You can copy the  .env.example file and rename it to .env (one directory  up from the notebooks)
# 
%load_ext dotenv
%load_ext line_profiler
# Load environment variables from the .env file 1 directory up
%dotenv -v

In [None]:
# read the environment variable from the  .env file
sas_token = dotenv.dotenv_values()['AZURE_BLOB_SAS_TOKEN']

### Creation of the cluster with high worker memory

In [None]:
gateway = dask_gateway.Gateway()
cluster_options = gateway.cluster_options()
cluster_options.worker_memory = 64
cluster = gateway.new_cluster(cluster_options)
cluster.adapt(minimum=1, maximum=100)
cluster

In [None]:
client = dask.distributed.Client(cluster)
client

In [None]:
def worker_setup(dask_worker: dask.distributed.Worker):
    import os
    os.system("pip install -q movingpandas")  # or pip

client.register_worker_callbacks(worker_setup)

In [None]:
client.upload_file('ais_to_fis/fis_network.py')

In [None]:
src_path = pathlib.Path('/home/jovyan/AIS/Rotterdam/ais_to_fis').resolve()
sys.path.insert(0, str(src_path)) 

### Import data

In [None]:
with open(path+"\\00_Input_data\\01_Geospatial_data\\FG.pickle", 'rb') as f:
    FG = pickle.load(f)
    
with open(path+"\\03_Simulation\\00_Input_data\\01_Geospatial_data\\network\\PoR_graph_with_information.pickle", 'rb') as f:
    FG_model = pickle.load(f)

anchorage_areas = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\anchorage_areas.geojson")
separation_zones = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\separation_zones.geojson")
separation_boundaries = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\separation_boundaries.geojson")
turning_basins = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\turning_basins.geojson")
waterways = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\water.geojson")
coastline = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\coastline.geojson")
berths = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\Berths.geojson")
liquid_bulk_terminals = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\liquid_bulk_terminals.geojson")
container_terminals = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\container_terminals.geojson")
dry_bulk_terminals = gpd.read_file(path+"\\00_Input_data\\01_Geospatial_data\\dry_bulk_terminals.geojson")

berths = berths.set_index('index')
berths.index.names = ['Name']
anchorage_areas['name'] = anchorage_areas['seamark:name']
anchorage_areas = anchorage_areas.set_index('name')
anchorage_areas = anchorage_areas[['geometry']]

separation_zones['geometry'] = [Polygon(geom) for geom in separation_zones['geometry']] 
anchorage_areas['geometry'] = [Polygon(geom) for geom in anchorage_areas['geometry']] 
turning_basins['geometry'] = [Polygon(geom) for geom in turning_basins['geometry']] 

water_color = 'lightblue'
boundary_color = 'k'
liquid_bulk_color = 'dodgerblue'
container_color = 'blue'
dry_bulk_color = 'midnightblue'
terminal_color = 'lightgrey'

Koole = liquid_bulk_terminals[['Koole Tankstorage Botlek' in name if isinstance(name,str) else False for name in liquid_bulk_terminals.name]]

In [None]:
#Corrections to the edges and nodes to be removed
for node in ['offshore1','offshore2','offshore3','offshore4','offshore5','offshore6','anchorage']:
    FG_model.remove_node(node)
    
remove_edge_and_nodes = [['8863594', 'B33440_B'],
                         ['B33440_B', 'B33440_A'],
                         ['8866170', 'B49209_A'],
                         ['B45863_B','B45863_A'],
                         ['22161426', 'B45863_B'],
                         ['8866170', 'B45863_A'],
                         ['B49209_A','B49209_B'],
                         ['B15339_B','B15339_A'],
                         ['8864161', 'B15339_A'],
                         ['8864161', '8867024'],
                         ['8867008', '8866992'],
                         ['8861414', '8867008'],
                         ['8861414', '8867154'],
                         ['8863786', '8865027'],
                         ['8867154', '8863786'],
                         ['8867154', '8867029'],
                         ['8867029', 'S38127_B'],
                         ['S38127_A', 'S38127_B'],
                         ['B44072_B','B44072_A'],
                         ['S38127_A','B44072_B'],
                         ['8863200', 'B44072_A'],
                         ['8860947', '8867066'],
                         ['8863139', '8863914'],
                         ['8862288', '8862714'],
                         ['B57361_B','B57361_A'],
                         ['B57361_A', 'B7951_B'],
                         ['8860647', '8864217']]

remove_nodes = ['8868450','22161408','8862288','8863139','8864345','8865732','8867066','8868418','B44869_A','B14087_A','B7951_B','B7951_A','8866992']

ignore_edges = [['8867547', '8862973'],['8860596', '8864988'],['8868239', '8867363'],['8861764', '8863206'],['8861718', '8866182'],['8867449', 'B5729_B'],['8862930', '8866083'],
                ['8864805', '8867639'],['8864288', '8864805'],['8864288', '8865822']]

add_ignore_edges = []
for edge in ignore_edges:
    add_ignore_edges.append([edge[1],edge[0]])
ignore_edges.extend(add_ignore_edges)
ignore_edges = [tuple(edge) for edge in ignore_edges]

ignore_nodes = ['8862973','8864988','8867363','8861764','8861718','B5729_B','8866083','8864805', '8867639','8867547','8862214','8862925','S14716_B','S14716_A',
                '8867547','8862585','8866260','8867341','8860701','B17838816_A','8862143','B42792_B','8867784']

for edge in remove_edge_and_nodes:
    if edge in FG_model.edges:
        FG_model.remove_edge(edge[0],edge[1])
    if edge[0] in FG_model.nodes:
        FG_model.remove_node(edge[0])
    if edge[1] in FG_model.nodes:
        FG_model.remove_node(edge[1])
        
for node in remove_nodes:
    if node in FG_model.nodes:
        FG_model.remove_node(node)
        
non_intersection_nodes = []
for node in FG_model.nodes:
    if len(FG_model.edges(node)) == 2:
        non_intersection_nodes.append(node)

### Set parameters

In [None]:
cmap = plt.get_cmap('magma')
norm = mpl.colors.Normalize(vmin=0, vmax=14)

knots = 0.514444444
wgs84_ = pyproj.CRS('EPSG:4326')
utm = pyproj.CRS('EPSG:32631')
wgs84_to_utm = pyproj.Transformer.from_crs(wgs84_, utm, always_xy=True).transform
utm_to_wgs84 = pyproj.Transformer.from_crs(utm, wgs84_, always_xy=True).transform

## Functions

In [None]:
def make_rgb_transparent(rgb, bg_rgb,alpha):
    """ 
    Function that creates a non-transparent color based on a transparent color
    
    Parameters
    ----------
    rgb: RGB-code as a list of RGB numbers from 0.0 to 1.0 as floats
    bg_rgb: background RGB-code as a list of RGB numbers from 0.0 to 1.0 as floats
    alpha: transparency of the RGB-color on the background RGB-color from 0.0 to 1.0 as float
    
    :returns: non-transparent color
    """
    
    non_transparent_color = [alpha * c1 + (1 - alpha) * c2 for (c1, c2) in zip(rgb, bg_rgb)]
    
    return non_transparent_color

def dataframe_preparation(df):
    """ 
    Function that prepares the voyage dataframe for further analysis
    
    Parameters
    ----------
    df: voyage dataframe
    
    :returns: prepared voyage dataframe
    """
    
    non_transparent_color = [alpha * c1 + (1 - alpha) * c2 for (c1, c2) in zip(rgb, bg_rgb)]
    
    return non_transparent_color
    new_df = pd.DataFrame(columns=['name','trip_id','traj_id','time','longitude','latitude','draught','sog','cog'])
    for loc,row_info in df.iterrows():
        length_df = len(row_info.coordinates)
        names = [row_info['name']]*length_df
        trip_id = [loc]*length_df
        traj_id = [row_info['name']+'_'+str(loc)]*length_df
        times = [datetime.datetime.fromtimestamp((time-np.datetime64("1970-01-01"))/np.timedelta64(1,'s')) for time in row_info.times]
        longitudes = [coord.x for coord in row_info.coordinates]
        latitudes = [coord.y for coord in row_info.coordinates]
        draught = row_info.draught
        sog = row_info.sog
        cog = row_info.cog
        row_df = pd.DataFrame({'name':names,'trip_id':trip_id,'traj_id':traj_id,'time':times,'longitude':longitudes,'latitude':latitudes,'draught':draught,'sog':sog,'cog':cog})
        new_df = pd.concat([new_df,row_df])
    new_df = new_df.reset_index(drop=True)
    return new_df

def reset_index(df):
    """ 
    Function that resets the index and keeps the old index
    
    Parameters
    ----------
    df: pandas dataframe
    
    :returns: dataframe
    """
    df = df.reset_index(drop = False)
    return df

def reorder_columns(df,columns):
    """ 
    Function that reorders the columns of a pandas dataframe
    
    Parameters
    ----------
    df: pandas dataframe
    
    :returns: pandas dataframe
    """
    df = df[columns]
    return df

def calculate_trajectory_information(df):
    """ 
    Function that calculates the time, distance, speed, and direction information of a trajectory dataframe
    
    Parameters
    ----------
    df: pandas dataframe
    
    :returns: pandas dataframe
    """
    import movingpandas as mpd
    columns=list(df.columns)
    columns = columns + ['distance','timedelta','speed','direction','directiondelta']
    new_df = pd.DataFrame(columns=columns)
    gdf = gpd.GeoDataFrame(df,columns=columns,crs="EPSG:4326",geometry=gpd.points_from_xy(df.longitude, df.latitude))
    trajectories = mpd.TrajectoryCollection(gdf,traj_id_col='traj_id',t='time',x='longitude',y='latitude',crs="EPSG:4326")
    for trajectory in trajectories:
        trajectory.add_distance(overwrite=True)
        trajectory.add_timedelta(overwrite=True)
        trajectory.add_speed(overwrite=True)
        trajectory.add_direction(overwrite=True)
    if trajectories:
        new_df = trajectories.to_point_gdf()
    new_df = new_df.reset_index(drop=False)
    directiondelta = [0]
    for phi1,phi2 in zip(new_df.direction.to_numpy()[:-1],new_df.direction.to_numpy()[1:]):
        directiondelta.append(abs(phi2-phi1))
    directiondelta = [ddir if ddir <= 180 else ddir-180 for ddir in directiondelta]
    new_df['directiondelta'] = directiondelta
    new_df.at[0,'timedelta'] = pd.Timedelta(0,'s')              
    new_df = new_df[columns]
    return new_df

def add_checkpoints(df, thr_m=25, thr_dist=1000, thr_course=30):
    """ 
    Function that sets checkpoints over the trajectory
    
    Parameters
    ----------
    df: pandas dataframe
    thr_m: time threshold in minutes
    thr_dist: distance threshold in meters
    thr_course: course threshold in degrees
    
    :returns: pandas dataframe
    """
    cum_dist = 0
    cum_course = 0
    df['checkpt'] = 0
    df = df.reset_index(drop = True)
    for i in df.index[1:]:
        cum_dist += df.at[i, 'distance']
        cum_course += df.at[i, 'directiondelta']

        # if start of new trip, add checkpoints (begin and end) and reset cumulatives 
        if (df.at[i, 'traj_id'] != df.at[i-1, 'traj_id']): 
            df.at[i-1, 'checkpt'] = 1
            df.at[i, 'checkpt'] = 1
            cum_dist = 0
            cum_course = 0
        
        # if threshold is exceeded, add checkpoint and reset cumulatives 
        elif (cum_dist > thr_dist) | (abs(cum_course) > thr_course): 
            df.at[i, 'checkpt'] = 1
            cum_dist = 0
            cum_course = 0
            
    # add checkpoints for the first and last sample 
    df.at[0, 'checkpt'] = 1
    df.at[df.index[-1], 'checkpt'] = 1
    
    return df

def get_neighbors(nbs, G): 
    """ 
    Function that finds the neighbouring nodes of a list of neighbouring nodes (connected by edges)
    
    Parameters
    ----------
    nbs: neighbouring nodes
    G: networkx graph
    
    :returns: networkx graph
    """
    
    new_nbs = []
    for nb in nbs: 
        new_nbs += [n for n in G.neighbors(nb)]

    all_nbs = nbs + new_nbs
    return all_nbs


def get_x_deg_sg(nodes, x, G): 
    """ 
    Function that creates a networkx subgraph
    
    Parameters
    ----------
    nodes: list of node strings
    x: number of nodes
    G: networkx graph
    
    :returns: networkx graph
    """
    if type(nodes) != list: 
        nodes = [nodes]

    for i in list(range(0, x)): 
        nodes = get_neighbors(nodes, G)

    subgraph = G.subgraph(nodes)
    return subgraph 

def create_edge_paths(df, G): 
    """ 
    Function that creates paths of edges over the trajectory
    
    Parameters
    ----------
    df: pandas dataframe
    G: networkx graph
    
    :returns: pandas dataframe
    """
    import fis_network
    
    df['ch_cl_node'] = ''
    df['ch_cl_edge'] = ''
    df['path'] = ''
    df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['longitude'], df['latitude']), crs="EPSG:4326")
    
    # create a graph that covers the AIS track
    SG_ais = fis_network.reduce_FG_to_AIS_area(G, df)
    
    ind_cps = df[df['checkpt'] == 1].index 

    for i_cp, idx_cp in enumerate(ind_cps):
        
        # if the checkpoint is the first in a trip, reset the graph
        if (i_cp == 0) | ((i_cp > 0) & (df.at[ind_cps[i_cp-1], 'traj_id'] != df.at[ind_cps[i_cp], 'traj_id'])):
            SG = SG_ais

            # determine closest node to checkpoint
            curr_edge, curr_node = fis_network.find_closest_edge_and_node(SG, df.at[ind_cps[i_cp], 'geometry'])
            df.at[ind_cps[i_cp], 'ch_cl_node'] = curr_node
            df.at[ind_cps[i_cp], 'ch_cl_edge'] = curr_edge

        else:
            # derive a subgraph based on previous node
            prev_node = df.at[ind_cps[i_cp-1], 'ch_cl_node']
            SG = get_x_deg_sg(curr_node, 4, G)
            # determine closest node to checkpoint
            
            curr_edge, curr_node = fis_network.find_closest_edge_and_node(SG, df.at[ind_cps[i_cp], 'geometry'])
            df.at[ind_cps[i_cp], 'ch_cl_node'] = curr_node
            df.at[ind_cps[i_cp], 'ch_cl_edge'] = curr_edge
        
    keep_node = list(dict.fromkeys([list(df.ch_cl_node.to_numpy()).index(node) for node in [path for path in df.ch_cl_node.to_numpy() if path != '']]))
    if 0 not in keep_node:
        keep_node.insert(0,0)
    if len(df)-1 not in keep_node:
        keep_node.append(len(df)-1)
        
    paths = [edge for idx,edge in df['ch_cl_edge'].items() if idx in keep_node]
    routes = []
    all_routes = None
    for idx,(path1,path2) in enumerate(zip(paths[:-1],paths[1:])):
        node1,node2 = path1[0],path1[1]
        node3,node4 = path2[0],path2[1]
        route1 = nx.dijkstra_path(FG,node1,node3)
        route2 = nx.dijkstra_path(FG,node1,node4)
        route3 = nx.dijkstra_path(FG,node2,node3)
        route4 = nx.dijkstra_path(FG,node2,node4)
        all_routes = [route1,route2,route3,route4]
        route_index = np.argmax([len(route) for route in all_routes])
        routes[keep_node[idx]:keep_node[idx+1]] = [all_routes[route_index]]*(keep_node[idx+1]-keep_node[idx])
    routes.append(all_routes[route_index])
    df['path'] = routes

    df.drop(columns=['geometry'], inplace=True)

    return df

def df_with_closest_edges(df, G): 
    """ 
    Function that add the checkpoint, closest node, closest edge, path and geometry to the dataframe
    
    Parameters
    ----------
    df: pandas dataframe
    G: networkx graph
    
    :returns: pandas dataframe
    """
    import fis_network
    df['cl_edge'] = ''
    
    # add geometry to dataframe
    df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['longitude'], df['latitude']), crs="EPSG:4326")
    
    for idx,info in df.iterrows():
        cl_edge, _ = fis_network.find_closest_edge(G.subgraph(info.path), point=Point([info.longitude,info.latitude]))
        node1,node2 = cl_edge[0],cl_edge[1]
        if list(info.path).index(node1) > list(info.path).index(node2):
            node1,node2 = node2,node1
        df.at[idx, 'cl_edge'] = (str(node1),str(node2))
    
    df = df.drop(['ch_cl_edge', 'ch_cl_node', 'checkpt','path','geometry'],axis=1)
    
    return df

def create_row_edge_crossing(df_ais, edge_ixs, colnames):
    """ 
    Function that creates a row for the edge dataframe
    
    Parameters
    ----------
    df_ais: pandas dataframe
    edge_ixs: loc in pandas dataframe
    colnames: column names as a list of strings
    
    :returns: pandas dataframe
    """
    
    df_row = pd.DataFrame(columns=colnames, index=[0])
    df_row.loc[0, 'edgeA'] = df_ais.loc[edge_ixs[0], 'cl_edge'][0]
    df_row.loc[0, 'edgeB'] = df_ais.loc[edge_ixs[0], 'cl_edge'][1]
    df_row.loc[0, 'vesselname'] = df_ais.loc[edge_ixs[0], 'name']
    df_row.loc[0, 'tripnr'] = df_ais.loc[edge_ixs[0], 'traj_id']
    
    df_row.loc[0, 't_start'] = df_ais.loc[edge_ixs[0], 'time']
    df_row.loc[0, 'draught'] = df_ais['draught'].loc[edge_ixs].max()
    df_row.loc[0, 'edge_duration'] = df_ais['timedelta'].loc[edge_ixs].sum()
    df_row.loc[0, 'edge_dist'] = df_ais['distance'].loc[edge_ixs].sum()
    if not pd.isnull(df_row.loc[0, 'edge_duration']) and df_row.loc[0, 'edge_duration'] > pd.Timedelta(0,'s'):
        df_row.loc[0, 'edge_speed'] = df_row.loc[0, 'edge_dist'] / df_row.loc[0, 'edge_duration'].total_seconds()
    else:
        df_row.loc[0, 'edge_speed'] = 0

    df_row.loc[[0], 'dtime'] = pd.Series([list(df_ais.loc[edge_ixs, 'timedelta'])],index=[0])
    df_row.loc[[0], 'ddist'] = pd.Series([list(df_ais.loc[edge_ixs, 'distance'])],index=[0])
    df_row.loc[[0], 'latlon'] = pd.Series([[list(df_ais.loc[edge_ixs, 'latitude']),
                               list(df_ais.loc[edge_ixs, 'longitude'])]],index=[0])

    return df_row

def create_df_edges(df):
    """ 
    Function that creates an edge dataframe
    
    Parameters
    ----------
    df: pandas dataframe
    
    :returns: pandas dataframe
    """
    
    edge_ixs = [df.index[0]]
    cols = ['edgeA', 'edgeB', 'vesselname', 'traj_id', 'draught', 't_start', 'edge_duration', 'edge_dist', 'edge_speed', 'dtime', 'ddist', 'latlon', 'clus']
    
    df_edges = pd.DataFrame(columns=cols)
    for i, ix in enumerate(df.index[:-1]):
        curr_edge = (df['cl_edge'].iloc[i][0], df['cl_edge'].iloc[i][1])
        next_edge = (df['cl_edge'].iloc[i+1][0], df['cl_edge'].iloc[i+1][1])
        
        curr_trip = df['traj_id'].iloc[i]
        next_trip = df['traj_id'].iloc[i+1]
        
        if (curr_edge == next_edge) & (curr_trip == next_trip):
            edge_ixs.append(df.index[i + 1])
            
        else:
            # create row for edge crossing
            df_row = create_row_edge_crossing(df, edge_ixs, cols)
            df_edges = pd.concat([df_edges, df_row])
            edge_ixs = [df.index[i + 1]]

    df_row = create_row_edge_crossing(df, edge_ixs, cols)
    df_edges = pd.concat([df_edges, df_row])
    df_edges = df_edges.reset_index(drop=True)
    return df_edges

## Import results AIS data analysis and open geospatial data

In [None]:
ddf = dd.read_parquet(path_name+'/ship_dataframe',storage_options={"account_name": "rwsais", "sas_token": sas_token})
ship_dataframe = ddf.compute()

In [None]:
ddf = dd.read_parquet(path_name+'/voyage_dataframe',storage_options={"account_name": "rwsais", "sas_token": sas_token})
geometry_columns = ['geometry_anchorage','geometry_entry','geometry_departure','coordinates']
ddf_i = ddf.partitions[:]
ddf_i = ddf_i.map_partitions(renumber_index)
ddf_i = ddf_i.map_partitions(convert_string_geometry_to_shapely_geometry,geometry_columns)
voyage_dataframe = ddf_i.compute()
voyage_dataframe = voyage_dataframe.rename(columns={'trip_number':'trip_id'})
voyage_dataframe = voyage_dataframe[voyage_dataframe.berth_of_call.isin(berths.index)]
voyage_dataframe = voyage_dataframe.reset_index(drop=True)
voyage_dataframe = dataframe_preparation(voyage_dataframe)

In [None]:
ddf = dask.dataframe.from_pandas(voyage_dataframe,npartitions=1)
ddf = ddf.set_index('traj_id',npartitions=len(list(set(df.traj_id))))
scheme_information = {'trip_id': pa.int64()}
ddf = ddf.map_partitions(reset_index)
ddf = ddf.map_partitions(reorder_columns,columns=df.columns)
ddf.to_parquet(path_name+'/ais_tracks', storage_options={"account_name": "rwsais", "sas_token": sas_token},schema=scheme_information,engine='pyarrow',write_index=False)

In [None]:
ddf = dd.read_parquet(path_name+'/ais_tracks', storage_options={"account_name": "rwsais", "sas_token": sas_token})

In [None]:
ddf_i = ddf.partitions[:]
df_v = pd.DataFrame(columns=['name','trip_id','traj_id','time','longitude','latitude','draught','sog','cog','distance','timedelta','speed','direction','directiondelta'])
ddf_i = ddf_i.map_partitions(calculate_trajectory_information,meta=df_v)
df_cp = pd.DataFrame(columns=['name','trip_id','traj_id','time','longitude','latitude','draught','sog','cog','distance','timedelta','speed','direction','directiondelta','checkpt'])
ddf_i = ddf_i.map_partitions(add_checkpoints,meta=df_cp)
df_p = pd.DataFrame(columns=['name','trip_id','traj_id','time','longitude','latitude','draught','sog','cog','distance','timedelta','speed','direction','directiondelta','checkpt','ch_cl_node','ch_cl_edge','path'])
ddf_i = ddf_i.map_partitions(create_edge_paths,G=FG,meta=df_p)
df_ais = pd.DataFrame(columns=['name','trip_id','traj_id','time','longitude','latitude','draught','sog','cog','distance','timedelta','speed','direction','directiondelta','cl_edge'])
ddf_i = ddf_i.map_partitions(df_with_closest_edges,G=FG,meta=df_ais)
scheme_information = {'name': pa.string(),
                      'trip_id':pa.int64(),
                      'traj_id': pa.string(),
                      'time': pa.timestamp('ns'),
                      'longitude': pa.float64(),
                      'latitude': pa.float64(),
                      'draught':pa.float64(),
                      'sog': pa.float64(),
                      'cog': pa.float64(),
                      'distance': pa.float64(),
                      'timedelta': pa.duration('ns'),
                      'speed': pa.float64(),
                      'direction': pa.float64(),
                      'directiondelta': pa.float64(),
                      'cl_edge': pa.list_(pa.string())}
for datacolumn,datatype in scheme_information.items(): 
    ddf_i[datacolumn] = ddf_i[datacolumn].astype(datatype.to_pandas_dtype())
ddf_i = ddf_i.repartition(npartitions=1)
ddf_i.to_parquet(path_name+'/edge_dataframe', storage_options={"account_name": "rwsais", "sas_token": sas_token},schema=scheme_information,engine='pyarrow',write_index=False)

In [None]:
ddf = dd.read_parquet(path_name+'/edge_dataframe', storage_options={"account_name": "rwsais", "sas_token": sas_token})
df_ais = ddf.compute()
df_ais = df_ais.reset_index(drop=True)

In [None]:
df_edges = create_df_edges(df_ais)

### Group the data by edge name

In [None]:
# create overview of edges
df_edges['edge'] = list(zip(df_edges['edgeA'], df_edges['edgeB']))

In [None]:
df_edge = df_edges.groupby(['edge']).agg({'vesselname': list, 'edge_duration': list, 'edge_dist': list, 'edge_speed': list})

## Figure 06

In [None]:
fig,ax = plt.subplots(figsize=[32,18])
lon_min = 3.9
lon_max = 4.385
lat_min = 51.835
lat_max = 52.015
ax.set_facecolor('lightgrey')
water_color = 'lightblue'
boundary_color = 'k'
color_1 = (135/255,135/255,135/255) #'dodgerblue'
color_2 = (135/255,135/255,135/255) #'blue'
color_3 = (135/255,135/255,135/255) #'midnightblue'
anchorage_color = 'limegreen'
seperation_color = 'violet'
terminal_color = 'darkgrey'

color_calandlijn = (226/255,33/255,18/255)
color_erasmuslijn = (2/255,58/255,141/255)

waterways.plot(ax=ax,facecolor=water_color,edgecolor='none',linewidth=2,zorder=100)
ax.fill([0,0,0,0],[0,0,0,0],color=water_color,label='water')

x0 = FG.nodes['8866969']['geometry'].x
y0 = FG.nodes['8866969']['geometry'].y+0.004
x2 = np.mean([FG.nodes['8866969']['geometry'].x,FG.nodes['8866305']['geometry'].x])
y2 = np.mean([FG.nodes['8866969']['geometry'].y,FG.nodes['8866305']['geometry'].y])+0.004
x1 = x0-(x2-x0)
y1 = y0-(y2-y0)
angle = np.arctan2(x2-x0,y2-y0)+0.05*np.pi
dist = np.sqrt((x2-x0)**2+(y2-y0)**2)/1.25
x3 = x2+np.sin(angle-0.5*np.pi)*dist
y3 = y2+np.cos(angle-0.5*np.pi)*dist
x4 = x1+np.sin(angle-0.5*np.pi)*dist
y4 = y1+np.cos(angle-0.5*np.pi)*dist
virtual_anchorage_area = Polygon([Point(x1,y1),Point(x2,y2),Point(x3,y3),Point(x4,y4)]).exterior
ax.fill([coord[0] for coord in virtual_anchorage_area.coords],
        [coord[1] for coord in virtual_anchorage_area.coords],
        facecolor='none',edgecolor=anchorage_color,linestyle='--',label='anchorage area',linewidth=3,zorder=100)

Koole = Koole.reset_index(drop=True)
new_Koole = Koole.copy()
for loc,info in Koole.iterrows():
    new_Koole.loc[loc,'geometry'] = info['geometry']
new_Koole.plot(ax=ax,color=terminal_color,edgecolor='none',linewidth=5,zorder=99)

# turning_basins.plot(ax=ax,facecolor='none',edgecolor='k',linewidth=2,linestyle='--',zorder=100)
# ax.plot([0,0], [0,0], marker='$\u25CC$', markerfacecolor='k', markeredgecolor='none', markersize=32, linestyle='none',linewidth=1,label='turning basin')

for node in FG_model.nodes:
    coords = FG_model.nodes[node]['geometry'].coords.xy
    ax.scatter(coords[0],coords[1],s=30,linewidth=3,color=make_rgb_transparent(color_erasmuslijn,mpl.colors.to_rgb(water_color),alpha=0.25),zorder=102)
for edge in FG_model.edges:
    coords = FG_model.edges[edge]['geometry'].coords.xy
    ax.plot(*coords,linewidth=4,color=make_rgb_transparent(color_erasmuslijn,mpl.colors.to_rgb(water_color),alpha=0.25),zorder=102)

edges_of_interest = pd.DataFrame(columns = df_edge.columns)
for edge in df_edge.index:
    if edge in FG_model.edges and edge not in ignore_edges:
        edges_of_interest = pd.concat([edges_of_interest,df_edge.loc[[edge]]])
        
nodes_of_interest = []
for edge in edges_of_interest.index:
    if edge[0] not in nodes_of_interest:
        nodes_of_interest.append(edge[0])
    if edge[1] not in nodes_of_interest:
        nodes_of_interest.append(edge[1])
        
for edge in edges_of_interest.index:
    geom = FG_model.edges[edge]['geometry']
    geom_p1 = FG_model.nodes[edge[0]]['geometry']
    geom_p2 = FG_model.nodes[edge[1]]['geometry']
    first = Point(geom.coords[0])
    second = Point(geom.coords[-1])
    angle = math.atan2((second.x-first.x),(second.y-first.y))
    angle = angle/(2*np.pi)*360
    geom = transform(wgs84_to_utm,geom)

    if not edge[0] in list(FG_model.nodes) or not edge[1] in list(FG_model.nodes):
        continue
    route = nx.dijkstra_path(FG_model,edge[0],'8866969')
    if edge[0] == '8866969':
        if edge[1] == 'offshore6':
            bound = 'outbound'
        elif edge[1] == '8866305':
            bound = 'inbound'
            
    if 'offshore6' in route:
        if edge[1] in route and list(route).index(edge[1]) > list(route).index(edge[0]):
            bound = 'inbound'
        else:
            bound = 'outbound'
    elif '8866305' in route:
        if edge[1] in route and list(route).index(edge[1]) < list(route).index(edge[0]):
            bound = 'outbound'
        else:
            bound = 'inbound'

    if bound == 'inbound':
        if np.round(first.x,3) == np.round(geom_p1.x,3) and np.round(first.y,3) == np.round(geom_p1.y,3):
            geom_final = geom.offset_curve(-100)
        else:
            geom_final = geom.offset_curve(100)
    else:
        if np.round(first.x,3) == np.round(geom_p1.x,3) and np.round(first.y,3) == np.round(geom_p1.y,3):
            geom_final = geom.offset_curve(-100)
        else:
            geom_final = geom.offset_curve(100)

    geom = transform(utm_to_wgs84,geom_final)
    coords = geom.coords.xy
    color = cmap(np.mean(edges_of_interest[edges_of_interest.index == (edge[0],edge[1])].edge_speed.iloc[0])/knots/14)
    ax.plot(coords[0],coords[1],linewidth=6,color=color,zorder=102)

ax.plot([0,0],[0,0],color=make_rgb_transparent(color_erasmuslijn,mpl.colors.to_rgb(water_color),alpha=0.25),linewidth=4,marker='o',markersize=8,label='Network')    
ax.plot([0,0],[0,0],color=color_erasmuslijn,linewidth=8,marker='o',markerfacecolor='white',markeredgecolor='k',markersize=14,label='Routes',markeredgewidth=4)
        
nodes_x = []
nodes_y = []
for node in nodes_of_interest:
    if node not in ignore_nodes:
        coords = FG_model.nodes[node]['geometry'].coords.xy
        nodes_x.append(coords[0])
        nodes_y.append(coords[1])
ax.scatter(nodes_x,nodes_y,s=160,linewidth=4,edgecolor='k',facecolor='white',zorder=102)    
ax.set_xlim([lon_min,lon_max])
ax.set_ylim([lat_min,lat_max]);

legend_entities,labels = ax.get_legend_handles_labels()
patches = [legend_entities[0],
           mpl.patches.Patch(facecolor='lightgrey',edgecolor='none',linewidth=2),
           mpl.patches.Patch(facecolor=color_1,edgecolor='none',linewidth=5),
           legend_entities[1],
           legend_entities[-2],
           legend_entities[-1]]
labels = ['Water','Land','Liquid bulk terminal','Virtual anchorage area','Network','Routes']
ax.legend(patches,labels,loc='upper right',prop={'size': 25})

for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(bottom=False, labelbottom=False,
               left=False, labelleft=False)
cbar = plt.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap),shrink=0.86,ax=ax,anchor=(-0.325,0.5025));
cbar.outline.set_linewidth(2)
cbar.ax.tick_params(labelsize=27)
cbar.ax.set_ylabel('Speed [kn]',size=27);
fig.savefig(path+'\\04_Output_data\\02_Figures\\Figure_06_Vessel_Speed_Network.eps', format='eps', dpi=1000,bbox_inches='tight',facecolor='none'); 

## Create vessel speed data

In [None]:
df_edge['average_speed'] = [np.mean(speeds) for speeds in df_edge['edge_speed'].to_numpy()]
vessel_speed_dataframe = df_edge[['average_speed']]

In [None]:
with open(path+'\\03_Simulation\\01_Input_data\\03_Vessels\\vessel_speed_dataframe.pickle', 'wb') as handle:
    pickle.dump(vessel_speed_dataframe, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Origin-Destination Matrix

In [None]:
df_origin_destination = voyage_dataframe[['name','trip_number','berth_of_call','coordinates','arrival_at_port','arrival_at_anchorage_at_arrival','departure_from_anchorage_at_arrival','arrival_at_port_entrance','departure_from_port_entrance','arrival_at_berth','departure_from_berth','arrival_at_turning_basin','departure_from_turning_basin','draught_at_arrival','draught_at_departure']]

In [None]:
arrival_times = []
for loc,info in df_origin_destination.iterrows():
    if type(info.arrival_at_port_entrance) == float:
        arrival_times.append(info.arrival_at_port)
    elif pd.isnull(info.waiting_time_in_anchorage):
        arrival_times.append(info.arrival_at_port_entrance)
    else:
        arrival_times.append(info.arrival_at_port_entrance-info.waiting_time_in_anchorage)

In [None]:
df_origin_destination['origin'] = [coordinates[0] for coordinates in df_origin_destination.coordinates.to_numpy()]
df_origin_destination['destination'] = [coordinates[-1] for coordinates in df_origin_destination.coordinates.to_numpy()]
df_origin_destination['waiting_time_in_anchorage'] = [time2-time1 if not type(time1) == float else pd.Timedelta(0,'s') for time1,time2 in zip(df_origin_destination.arrival_at_anchorage_at_arrival,df_origin_destination.departure_from_anchorage_at_arrival)]
df_origin_destination['origin_node'] = [fis_network.find_closest_edge_and_node(FG,origin)[1] for origin in df_origin_destination.origin.to_numpy()]
df_origin_destination['destination_node'] = [fis_network.find_closest_edge_and_node(FG,destination)[1] for destination in df_origin_destination.destination.to_numpy()]
df_origin_destination['berth_node'] = '8866999'
df_origin_destination['origin_node'] = ['8866969' if not type(arrival) == float else origin for origin,arrival in zip(df_origin_destination.origin_node.to_numpy(),df_origin_destination.arrival_at_port_entrance.to_numpy())]
df_origin_destination['destination_node'] = ['8866969' if not type(departure) == float else destination for destination,departure in zip(df_origin_destination.destination_node.to_numpy(),df_origin_destination.departure_from_port_entrance.to_numpy())]
df_origin_destination['turning_time'] = [time2-time1 if not type(time1) == float else pd.Timedelta(0,'s') for time1,time2 in zip(df_origin_destination.arrival_at_turning_basin,df_origin_destination.departure_from_turning_basin)]
df_origin_destination['arrival'] = [time.tz_localize(pytz.timezone('UTC')) if not time.tz else time for time in arrival_times]
df_origin_destination['draught'] = df_origin_destination['draught_at_arrival'] 
df_origin_destination['(un)loading'] = df_origin_destination['draught_at_arrival']-df_origin_destination['draught_at_departure'] 
df_origin_destination['length'] = [ship_dataframe.loc[name].length for name in df_origin_destination.name.to_numpy()]
df_origin_destination['width'] = [ship_dataframe.loc[name].width for name in df_origin_destination.name.to_numpy()]
df_origin_destination['(un)loading time'] = [time2-time1 if type(time2) != float else pd.Timestamp('2020-01-01',tz='UTC')-time1 for time1,time2 in zip(df_origin_destination['arrival_at_berth'].to_numpy(),df_trips['departure_from_berth'].to_numpy())]
df_origin_destination['trip_id'] = df_origin_destination['name']+'_'+df_origin_destination['trip_number'].astype(str)
df_origin_destination = df_origin_destination[['name','trip_id','length','width','draught','(un)loading','berth_of_call','arrival','waiting_time_in_anchorage','turning_time','(un)loading time','origin_node','berth_node','destination_node']]
df_origin_destination['arrival'] = [pd.Timestamp(time.strftime('%Y-%m-%d %X'),unit='s',tz=pytz.timezone('UTC')) for time in df_origin_destination.arrival.to_numpy()]

In [None]:
with open(path+'\\03_Simulation\\01_Input_data\\03_Vessels\\origin_destination_PoR.pickle', 'wb') as handle:
    pickle.dump(df_origin_destination, handle, protocol=pickle.HIGHEST_PROTOCOL)