In [7]:
import numpy as np
import pandas as pd
import geopandas as gpd
from features import Feature

In [8]:
dynamic = pd.read_csv('Datasets\dynamic.csv', parse_dates=['timestamp'])
dynamic['geometry'] = gpd.GeoSeries.from_wkt(dynamic['geometry'], crs='EPSG:4326')
dynamic = gpd.GeoDataFrame(dynamic)

gaps = pd.read_csv('Datasets\gaps.csv')
gaps['overall_rate'] = np.zeros(len(gaps))
gaps.head()

Unnamed: 0,mmsi,disappearid,disappeartime,disappearlocation,reappearid,reappeartime,reappearlocation,geometry,darkduration,darkdistance,darkspeed,overall_rate
0,228336000,23914,2015-10-02 03:29:19,POINT (-4.747795 48.185574),36073,2015-10-03 04:33:14,POINT (-4.7795234 48.36025),"LINESTRING (-4.747795 48.185574, -4.7795234 48...",90235.0,10.633473,0.424231,0.0
1,228336000,38897,2015-10-03 09:48:27,POINT (-5.1333485 48.16744),42831,2015-10-03 15:38:24,POINT (-5.1669436 48.598495),"LINESTRING (-5.1333485 48.16744, -5.1669436 48...",20997.0,25.883674,4.437835,0.0
2,228336000,42832,2015-10-03 15:38:34,POINT (-5.1665335 48.598743),42886,2015-10-03 17:32:43,POINT (-4.907183 48.45989),"LINESTRING (-5.1665335 48.598743, -4.907183 48...",6849.0,17.576539,9.238654,0.0
3,228336000,43161,2015-10-03 18:33:45,POINT (-4.7796965 48.360165),49482,2015-10-05 00:43:11,POINT (-4.7795634 48.360226),"LINESTRING (-4.7796965 48.360165, -4.7795634 4...",108566.0,0.008747,0.00029,0.0
4,228336000,53273,2015-10-05 08:38:37,POINT (-4.7800317 48.3596),64511,2015-10-06 00:38:49,POINT (-4.779775 48.360493),"LINESTRING (-4.7800317 48.3596, -4.779775 48.3...",57612.0,0.055643,0.003477,0.0


# The overallrate feature
The overall rate of a ship is the number of reports from this vessel that are part of a gap, divided by the numbers of reports of this ship. To calculate this rate we extract the trajectory of each individual ship in the gaps DataFrame. The trajectory is stored in the temp DataFrame. We then count all the 'disappear', 'reappear' and 'lone' labels from the edge column and divide this number by the length of the trajectory.

In [9]:
%%time
feat = pd.DataFrame(columns = ['overall rate'])

for mmsi in gaps.mmsi.unique() :
    temp = dynamic.loc[dynamic.sourcemmsi == mmsi].edge
    rate = 1-(temp.value_counts().loc['none']/len(temp))
    feat.loc[mmsi] = rate

CPU times: total: 2.11 s
Wall time: 2.17 s


In [10]:
def get_rate(index) :
    for ind in index :
        gaps.at[ind, 'overall_rate'] = feat['overall rate'].loc[gaps.at[ind, 'mmsi']]

In [11]:
%%time
get_rate(gaps.index)
gaps.head()

CPU times: total: 641 ms
Wall time: 664 ms


Unnamed: 0,mmsi,disappearid,disappeartime,disappearlocation,reappearid,reappeartime,reappearlocation,geometry,darkduration,darkdistance,darkspeed,overall_rate
0,228336000,23914,2015-10-02 03:29:19,POINT (-4.747795 48.185574),36073,2015-10-03 04:33:14,POINT (-4.7795234 48.36025),"LINESTRING (-4.747795 48.185574, -4.7795234 48...",90235.0,10.633473,0.424231,0.002008
1,228336000,38897,2015-10-03 09:48:27,POINT (-5.1333485 48.16744),42831,2015-10-03 15:38:24,POINT (-5.1669436 48.598495),"LINESTRING (-5.1333485 48.16744, -5.1669436 48...",20997.0,25.883674,4.437835,0.002008
2,228336000,42832,2015-10-03 15:38:34,POINT (-5.1665335 48.598743),42886,2015-10-03 17:32:43,POINT (-4.907183 48.45989),"LINESTRING (-5.1665335 48.598743, -4.907183 48...",6849.0,17.576539,9.238654,0.002008
3,228336000,43161,2015-10-03 18:33:45,POINT (-4.7796965 48.360165),49482,2015-10-05 00:43:11,POINT (-4.7795634 48.360226),"LINESTRING (-4.7796965 48.360165, -4.7795634 4...",108566.0,0.008747,0.00029,0.002008
4,228336000,53273,2015-10-05 08:38:37,POINT (-4.7800317 48.3596),64511,2015-10-06 00:38:49,POINT (-4.779775 48.360493),"LINESTRING (-4.7800317 48.3596, -4.779775 48.3...",57612.0,0.055643,0.003477,0.002008


In [12]:
Feature('overallrate', gaps.overall_rate).save()

# The closerate feature
The close rate is the number of reports labelled 'disappear', 'reapppear' and 'lone' i.e. part of a gap, in the 20 preceding and succeeding reports of a gap. In order to calculate this rate we extract the trajectory of each individual ship in the gaps DataFrame. The trajectory is stored in the traj DataFrame. Then we create the dgaps DataFrame which gives us the index of the gaps in the trajectory DataFrame in order to extract their neighbors. The mgaps dataset gives us the index of the gap for which we are calculating the close rate in order to insert the close rate value in the gaps DataFrame.

In [13]:
def get_close_rate(gaps) :
    ''' 
    The close rate is the number of reports labelled 'disappear', 'reapppear' and 'lone' i.e. part of a gap 
    in the 20 preceding and succeeding reports of a gap.
    In order to calculate this rate we extract the trajectory of each ship in the gaps dataset. Then we create dgaps 
    Dataframe which gives us the index of the gaps in the current trajectory
    '''
    test = np.array(['none']*40)
    for mmsi in gaps.mmsi.unique() :
        traj = dynamic.loc[dynamic.sourcemmsi == mmsi]
        traj.reset_index(drop = True, inplace = True)
        le = len(traj)
        mgaps = gaps.loc[gaps.mmsi == mmsi]
        dgaps = traj[(traj.edge == 'disappear')|(traj.edge == 'lone') ]
        for i in zip(dgaps.index, mgaps.index) :    
            neighbors = pd.concat([traj.edge.iloc[max(i[0]-20, 0):i[0]] ,traj.edge.iloc[i[0]+2:min(i[0]+22, le)]]).to_numpy()
            neighbors = np.where(neighbors==test[:len(neighbors)], 1, 0)
            gaps.at[i[1], 'close_rate'] = np.sum(neighbors)/len(neighbors)      

In [15]:
%%time
gaps['close_rate'] = np.zeros(len(gaps))
get_close_rate(gaps)
gaps.close_rate = 1-gaps.close_rate  # we counted the number of 'none' in the get_close_rate function
gaps.head()

CPU times: total: 5.27 s
Wall time: 5.35 s


Unnamed: 0,mmsi,disappearid,disappeartime,disappearlocation,reappearid,reappeartime,reappearlocation,geometry,darkduration,darkdistance,darkspeed,overall_rate,close_rate
0,228336000,23914,2015-10-02 03:29:19,POINT (-4.747795 48.185574),36073,2015-10-03 04:33:14,POINT (-4.7795234 48.36025),"LINESTRING (-4.747795 48.185574, -4.7795234 48...",90235.0,10.633473,0.424231,0.002008,0.0
1,228336000,38897,2015-10-03 09:48:27,POINT (-5.1333485 48.16744),42831,2015-10-03 15:38:24,POINT (-5.1669436 48.598495),"LINESTRING (-5.1333485 48.16744, -5.1669436 48...",20997.0,25.883674,4.437835,0.002008,0.05
2,228336000,42832,2015-10-03 15:38:34,POINT (-5.1665335 48.598743),42886,2015-10-03 17:32:43,POINT (-4.907183 48.45989),"LINESTRING (-5.1665335 48.598743, -4.907183 48...",6849.0,17.576539,9.238654,0.002008,0.05
3,228336000,43161,2015-10-03 18:33:45,POINT (-4.7796965 48.360165),49482,2015-10-05 00:43:11,POINT (-4.7795634 48.360226),"LINESTRING (-4.7796965 48.360165, -4.7795634 4...",108566.0,0.008747,0.00029,0.002008,0.0
4,228336000,53273,2015-10-05 08:38:37,POINT (-4.7800317 48.3596),64511,2015-10-06 00:38:49,POINT (-4.779775 48.360493),"LINESTRING (-4.7800317 48.3596, -4.779775 48.3...",57612.0,0.055643,0.003477,0.002008,0.0


In [16]:
Feature('localrate', gaps.close_rate).save()