Import libraries

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from datetime import datetime

# Disable copysettingwarning
pd.options.mode.chained_assignment = None

# Import berth_visits
berth_visits = pd.read_csv('data/berth_visits.csv')

Gather unique locations in berth_visits

In [None]:
locations = {x:'' for x in set(berth_visits['location_id'])}
locations

Gather these same locations using the PortMaps API

In [None]:
import requests
import json

# ~ 1400 locations so two calls needed
url1 = "https://api.portofrotterdam.com/v1/datascience/148/query?where=OBJECTID<=1000&outFields=*&f=geojson"
url2 = "https://api.portofrotterdam.com/v1/datascience/148/query?where=OBJECTID>1000&outFields=*&f=geojson"
headers = {'apikey': '<API_KEY>'} # informeer bij Bram voor deze key indien nodig
 
# Call one
req1 = requests.get(url1, headers=headers)
response1 = json.loads(req1.text)

# Call two
req2 = requests.get(url2, headers=headers)
response2 = json.loads(req2.text)

# Select
response1, response2 = response1['features'], response2['features']

# Merge
response = response1 + response2

# Retrieve only relevant information
port_locations = [[x['properties']['ZZ_CODE'],x['properties']['ZZLIGPLG'],x['properties']['ZZHVNAAM']] for x in response]
port_locations

In [None]:
codes = pd.DataFrame(port_locations, columns=['Code','Locatie 1', 'Locatie 2'])
codes

Export (the conversion is done by hand)

In [None]:
unieke_locaties = set(codes['Locatie 2'])
unieke_locaties = pd.DataFrame(unieke_locaties, columns=['Locatie PORTMAPS'])
unieke_locaties['Locatie EVIDES'] = ''

# Export naar excel
unieke_locaties.to_excel('data/locaties.xlsx')
unieke_locaties

Import

In [None]:
unieke_locaties = pd.read_excel('data/locaties_aangevuld.xlsx')
unieke_locaties = unieke_locaties.drop(columns=['Unnamed: 0', 'Unnamed: 3', 'Unnamed: 4'])
unieke_locaties

In [None]:
codes['Locatie EVIDES'] = ''
codes

Add Evides location information

In [None]:
codes['Locatie EVIDES'] = [unieke_locaties['Locatie EVIDES'][unieke_locaties['Locatie PORTMAPS']==x].values[0] for x in codes['Locatie 2']]
codes

Merge with berth_visits

In [None]:
# Rename 'Code' column to location_id
codes = codes.rename(columns={"Code": "location_id"})
berth_visits_merged = pd.merge(berth_visits, codes, on='location_id', how="left")
berth_visits_merged

Great, now we can start extracting AIS points. Let's import the Evides data. 

In [None]:
evides = pd.read_csv('../1. Exploratory Analysis/Data/Cleaned data/evides_cleaned2.csv')
evides = evides.drop(columns=['Unnamed: 0'])
evides

We'll create two new columns: longitude and latitude. 

In [None]:
evides['Latitude'], evides['Longitude'] = '', ''
evides

Now, let's go through the steps to fill the latitude and longitude columns. We start with one shipment.

In [None]:
# Get one shipment
evides.loc[8000]

We first try to find a match based both on date, ENI and location.

In [None]:
subset = berth_visits_merged[(berth_visits_merged['eni']==evides['ENI'].loc[8000]) 
                             & (berth_visits_merged['Locatie EVIDES']==evides['Haven'].loc[8000]) 
                             & (berth_visits_merged['start_date']<=evides['Datum'].loc[8000])
                             & (berth_visits_merged['end_date']>=evides['Datum'].loc[8000])]
subset

In case this is not possible, we try to find a match based on ENI and location.

In [None]:
sample_date = '2022-07-05'

subset = berth_visits_merged[(berth_visits_merged['eni']==evides['ENI'].loc[1]) 
                             & (berth_visits_merged['Locatie EVIDES']==evides['Haven'].loc[1])]

subset['day_diff'] = [datetime.strptime(sample_date, '%Y-%m-%d')-datetime.strptime(subset['start_date'].loc[x], '%Y-%m-%dT%H:%M:%S.%fZ')
for x in subset.index]

subset['day_diff'] = [abs(x.days) for x in subset['day_diff']]

subset = subset.sort_values(by='day_diff')

subset

From here we select the entry closest in date. If this is also not possible, we select valid berth coordinates based on the location. 

In [None]:
subset = berth_visits_merged[berth_visits_merged['Locatie EVIDES']==evides['Haven'].loc[0]]
subset

But first, we need to make sure all records in our Evides dataset can be converted. We've already merged based on all existing names that we received through the PortMaps API. This does not the include the following names, with the corresponding amount of rows:

In [None]:
for i in set(evides['Haven']):
    if i not in set(berth_visits_merged['Locatie EVIDES']):
        print(i, len(evides[evides['Haven']==i]))

Fortunately not a lot of rows are affected by this, but we should take care of this nevertheless. We will convert these names to the nearest harbour which does have bound location in the berth_visits dataset. 

In [None]:
name_changes = {
'Westerkade':'Wilhelminakade',
'Maassluis':'Nieuwe Maas',
'Pelgrimskade':'REMOVE',
'Stena Line':'Rijnhaven',
'Jobskade/Jobshaven':'Boompjes',
'Pionier':'Hartelkanaal',
'Kemira':'Botlek',
'zuiddiep':'REMOVE',
'Maaskade':'Rijnhaven',
'Radio Holland':'Schiehaven',
'Wilhelminakade':'Rijnhaven',
'krimpen a/d IJssel':'REMOVE'
}

for i in name_changes:
    evides['Haven'][evides['Haven']==i] = name_changes[i]

# Drop observations with value 'REMOVE'
evides = evides[evides['Haven'] != 'REMOVE']

Now, finally, let's extract the AIS coordinates!

In [None]:
for i in evides.index:
    # Attempt to find a perfect Match
    subset = berth_visits_merged[(berth_visits_merged['eni']==evides['ENI'].loc[i]) 
                             & (berth_visits_merged['Locatie EVIDES']==evides['Haven'].loc[i]) 
                             & (berth_visits_merged['start_date']<=evides['Datum'].loc[i])
                             & (berth_visits_merged['end_date']>=evides['Datum'].loc[i])]
    
    if len(subset) > 0:
        # Extract longitude
        evides['Latitude'].loc[i] = subset['latitude_enter'].reset_index(drop=True)[0]
        evides['Longitude'].loc[i] = subset['longitude_enter'].reset_index(drop=True)[0]

        print("Index", str(i), "Method 1")

    # Then attempt to find a match based on ENI and Location , if necessary. 
    if evides['Latitude'].loc[i] == '' and evides['Longitude'].loc[i] == '':
        # Create new subset
        subset = berth_visits_merged[(berth_visits_merged['eni']==evides['ENI'].loc[i]) 
                                & (berth_visits_merged['Locatie EVIDES']==evides['Haven'].loc[i])]
        
        subset['day_diff'] = [datetime.strptime(sample_date, '%Y-%m-%d')-datetime.strptime(subset['start_date'].loc[x], '%Y-%m-%dT%H:%M:%S.%fZ')
        for x in subset.index]

        subset['day_diff'] = [abs(x.days) for x in subset['day_diff']]
        subset = subset.sort_values(by='day_diff')
        
        if len(subset) > 0:
            # Extract longitude
            evides['Latitude'].loc[i] = subset['latitude_enter'].reset_index(drop=True)[0]
            evides['Longitude'].loc[i] = subset['longitude_enter'].reset_index(drop=True)[0]

            print("Index", str(i), "Method 2")
        
    # Then attempt to find a match based only on location, if necessary. 
    if evides['Latitude'].loc[i] == '' and evides['Longitude'].loc[i] == '':
        # Create new subset
        subset = berth_visits_merged[berth_visits_merged['Locatie EVIDES']==evides['Haven'].loc[i]]
        
        if len(subset) > 0:
            # Extract longitude
            evides['Latitude'].loc[i] = subset['latitude_enter'].reset_index(drop=True)[0]
            evides['Longitude'].loc[i] = subset['longitude_enter'].reset_index(drop=True)[0]

            print("Index", str(i), "Method 3")

Save for now

In [None]:
evides.to_csv('evides_withAIS.csv')  