In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

In [2]:
#df_vessel_terminal = pd.read_csv('./assets/test/geofenced_ais.csv')
df_vessel_terminal = pd.read_csv('./data/ais_with_vessel_berth/geofenced_ais.csv')

In [3]:
df_vessel_terminal.shape 

(3381018, 21)

In [4]:
def check_berthing(row):
    if pd.isna(row['at_terminal_before']):
        # The first record for a vessel must be is_entering_port = True and at_terminal = False
        # so, the at_terminal_before by shift(1) which is NaN should come with is_entering_port = True
        # therefore is_berthing should be False  
        return False
    else:
        if (row['at_terminal'] == True) and (row['at_terminal_before'] == False):
            return True
    return False

In [5]:
def get_berth_time(row):
    if pd.isna(row['is_berthing_next']):
        return None 
    # if port entering comes with a record for berthing, then it is a pair. The time_seen_next is the berth time 
    if (row['is_entering_port'] == True) and (row['is_berthing_next'] == True):
        return row['time_seen_next']
    return None

In [6]:
def get_terminal_name(row):
    # basically use the same logic as for get_berth_time
    if pd.isna(row['is_berthing_next']):
        return None 
    # if port entering comes with a record for berthing, then it is a pair. The terminal_name_next is the target terminal
    if (row['is_entering_port'] == True) and (row['is_berthing_next'] == True):
        return row['terminal_name_next']
    return None

In [7]:
vessels = df_vessel_terminal['imo'].unique().tolist()
all_vessels = []

for vsl in vessels:
    df_vsl = df_vessel_terminal[df_vessel_terminal['imo'] == vsl]

    # find the 1st record when a vessel is berthing 
    df_vsl.loc[:, 'at_terminal_before'] = df_vsl['at_terminal'].shift(1)
    df_vsl.loc[:, 'is_berthing'] = df_vsl.apply(lambda x: check_berthing(x), axis=1)

    # find the berth time and associate with the record when a vessel is entering the port area
    # Firstly, we only pick records with is_entering_port == True or is_berhing == True
    # Then we can work on this pairs
    df_vsl_berth_time = df_vsl[(df_vsl['is_entering_port'] == True) | (df_vsl['is_berthing'] == True)]
    # need to check if the df_berth_time is empty. Chances are we cannot detect a vessel's port entering time nor it is berthed at a terminal
    if not df_vsl_berth_time.empty: 
        df_vsl_berth_time['time_seen_next'] = df_vsl_berth_time['time_seen'].shift(-1)
        df_vsl_berth_time['is_berthing_next'] = df_vsl_berth_time['is_berthing'].shift(-1)
        df_vsl_berth_time['terminal_name_next'] = df_vsl_berth_time['terminal_name'].shift(-1)
        #print(vsl)
        #if vsl=='IMO9384617':
        #    df_test = df_berth_time
        #    print(df_test)
        #    break 
        df_vsl_berth_time['berth_time'] = df_vsl_berth_time.apply(lambda x: get_berth_time(x), axis=1)
        df_vsl_berth_time['target_terminal'] = df_vsl_berth_time.apply(lambda x: get_terminal_name(x), axis=1)

        all_vessels.append(
            #df_vsl.merge(df_vsl_berth_time[['imo', 'time_seen', 'is_berthing', 'berth_time']], how='left', left_on=['imo', 'time_seen'], right_on=['imo', 'time_seen'])
            df_vsl_berth_time
        )
        #df_all_vessels = df_all_vessels.merge(df_vsl_berth_time[['imo', 'time_seen', 'berth_time']], how='left', on=['imo', 'time_seen'])

df_all_vessels = pd.concat(all_vessels)

df_all_vessels_berth_time = pd.merge(df_vessel_terminal, df_all_vessels[['imo', 'time_seen', 'is_berthing', 'target_terminal', 'berth_time']], how='left', left_on=['imo', 'time_seen'], right_on=['imo', 'time_seen'])


In [8]:
# calculate dwell time in hour 
df_all_vessels_berth_time['dwell_in_hr'] = round(
    (pd.to_datetime(df_all_vessels_berth_time['berth_time']) - pd.to_datetime(df_all_vessels_berth_time['time_seen']))/np.timedelta64(1,'h'),
    1
)

In [9]:
df_all_vessels_berth_time.shape

(3381018, 25)

In [10]:
df_vessel_terminal.shape

(3381018, 21)

In [11]:
df_all_vessels_berth_time.head(3)

Unnamed: 0,mmsi,time_seen,lat,lon,sog,cog,heading,vessel_name,imo,call_sign,...,cargo,transceiver_class,date_seen,is_entering_port,at_terminal,terminal_name,is_berthing,target_terminal,berth_time,dwell_in_hr
0,565807000.0,2020-01-01 00:00:00,32.31175,-117.53433,14.7,162.1,163.0,NYK CLARA,IMO9355408,9VFW9,...,72.0,A,2020-01-01,False,False,,,,,
1,565807000.0,2020-01-01 00:31:18,32.18897,-117.48776,14.7,160.0,158.0,NYK CLARA,IMO9355408,9VFW9,...,72.0,A,,False,False,,,,,
2,565807000.0,2020-01-01 01:00:00,32.08009,-117.43979,14.5,161.7,163.0,NYK CLARA,IMO9355408,9VFW9,...,72.0,A,,False,False,,,,,


In [12]:
# Write result
#df_all_vessels_berth_time.to_csv('./assets/test/all_vessels_dwell_time.csv', index=False)
df_all_vessels_berth_time.to_csv('./data/ais_with_vessel_berth/all_vessels_dwell_time.csv', index=False)

Below is for record verification 

In [13]:
check_berth_vessels = df_all_vessels_berth_time[df_all_vessels_berth_time['at_terminal'] == True]['vessel_name'].unique().tolist()
df_check = df_all_vessels_berth_time[df_all_vessels_berth_time['vessel_name'].isin(check_berth_vessels)]
df_check = df_check[(df_check['is_entering_port'] == True) | (df_check['is_berthing'] == True)].sort_values(by=['vessel_name', 'time_seen'], ascending=True)

In [14]:
#df_check.to_csv('./assets/test/check_vsl_dwell_time.csv', index=False)
df_check.to_csv('./data/ais_with_vessel_berth/check_vsl_dwell_time.csv', index=False)

In [15]:
df_all_vessels_berth_time.isnull().sum()

mmsi                       0
time_seen                  0
lat                        0
lon                        0
sog                        0
cog                        0
heading                    0
vessel_name                0
imo                        0
call_sign                  0
vessel_type                0
status                  1764
length                 75373
width                 183542
draft                 230601
cargo                 624345
transceiver_class          0
date_seen            3293619
is_entering_port           0
at_terminal                0
terminal_name        3105117
is_berthing          3365974
target_terminal      3379216
berth_time           3379216
dwell_in_hr          3379216
dtype: int64

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6b18b33d-3a56-4f49-ad6e-71ecea9f0183' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>