## Importing the LonelyBoy Library (github.com/insert-generic-name-here/lonelyboy)

In [4]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here/'))
# sys.path

In [5]:
from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
from lonelyboy.geospatial import group_patterns as gsgp

## Importing all other Essential Libraries
#### (DO NOT FORGET TO EXECUTE THE FUNCTIONS IN THE BOTTOM CELLS)

In [6]:
import psycopg2
import numpy as np
import configparser
import pandas as pd
import geopandas as gpd
import contextily as ctx
from random import choice
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN, MeanShift
from sklearn.preprocessing import MinMaxScaler
from shapely.geometry import Point, LineString, shape
from haversine import haversine

In [7]:
from multiprocessing import cpu_count, Pool
from functools import partial
import datetime

## Import Libraries for Visualizations

In [8]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [9]:
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')
get_ipython().magic('matplotlib qt')

In [10]:
from tqdm import tqdm, tqdm_notebook

## Importing the Server Credentials 

In [11]:
properties = configparser.ConfigParser()
properties.read(os.path.join('.','sql_server.ini'))
properties = properties['SERVER']

['./sql_server.ini']

## Connectiing to Server and Fetch 48hrs of Trajectory Data

In [12]:
host    = properties['host']
db_name = properties['db_name']
uname   = properties['uname']
pw      = properties['pw']
port    = properties['port']

traj_sql = 'SELECT * FROM ais_data.dynamic_ships WHERE ts>1456802710 AND ts<1456975510  '
ports_sql = 'SELECT * FROM ports.ports_of_brittany'

con = psycopg2.connect(database=db_name, user=uname, password=pw, host=host, port = port)

traj = gpd.GeoDataFrame.from_postgis(traj_sql, con, geom_col='geom' )

ports = gpd.GeoDataFrame.from_postgis(ports_sql, con, geom_col='geom' )
ports.geom = ports.geom.apply(lambda x: x[0])

print(f'Fetched {sizeof_fmt(traj.memory_usage().sum())}')
print(f'Fetched {sizeof_fmt(ports.memory_usage().sum())}')

con.close()

Fetched 38.1MiB
Fetched 14.0KiB


In [13]:
ports.head(2)
traj.head(2)

Unnamed: 0,gid,gml_id,por_id,libelle_po,insee_comm,por_x,por_y,geom
0,1,port.1,1,Le Vivier-sur-Mer,35361,297025.0,2408370.0,POINT (-1.771798868659233 48.60274269672541)
1,2,port.10,10,Saint-Samson sur Rance,22327,279335.0,2396060.0,POINT (-2.001990119062326 48.48369993456267)


Unnamed: 0,id,mmsi,status,turn,speed,course,heading,lon,lat,ts,geom
0,17515086,227300000,7.0,-126.0,2.8,34.2,346,-4.631805,48.11133,1456802713,POINT (-4.631805 48.11133)
1,17515114,227300000,7.0,-126.0,1.1,36.4,309,-4.631512,48.11188,1456802793,POINT (-4.6315117 48.11188)


## (Hopefully) Doing something Useful

* ### Select some mmsi's
* ### Denoise them (per mmsi)
* ### Resample them (per mmsi)

In [14]:
### SELECT SOME MMSI'S
# mmsis = np.array([228186700, 477115900, 227002330, 227270000, 227369960, 227298110,\
#                    228190600, 227408710, 228849000, 227730220, 228762000, 227612860,\
#                    227592820, 227590030, 227654220, 227578460, 220364000, 636092323,\
#                    227322690, 227702670, 228021700, 219118000, 227312180, 273348830,\
#                    275457000, 226084000, 244976000, 224130870, 228203800, 228167900,\
#                    227327000, 228144000, 636016457, 215477000, 226318000, 314207000,\
#                    247087700, 563187000, 477612300, 227588930, 258316000, 228919000])
mmsis = traj.mmsi.unique()

In [15]:
### INSTEAD OF THIS CELL, EXECUTE THE ONE BELOW
# mmsis = np.array([227590030, 305476000, 235005980, 226084000, 227088590])
# sample_trajectories = gspp.pick_random_group(traj, 'mmsi', group_size=50)
# len(sample_trajectories)
# sample_trajectories

In [16]:
### DENOISE THEM
#### DROP TIMESTAMP DUPLICATES PER MMSI
sample_trajectories = traj.loc[traj.mmsi.isin(mmsis)]
sample_trajectories = sample_trajectories.drop_duplicates(subset=['mmsi', 'ts']).sort_values('ts').reset_index(drop=True)
sample_trajectories.head()

Unnamed: 0,id,mmsi,status,turn,speed,course,heading,lon,lat,ts,geom
0,12576917,227705102,15.0,-127.0,0.0,261.8,511,-4.496568,48.382435,1456802711,POINT (-4.496568 48.382435)
1,12576916,227941000,7.0,0.0,0.0,285.0,8,-4.327213,48.100086,1456802711,POINT (-4.3272133 48.100086)
2,17515086,227300000,7.0,-126.0,2.8,34.2,346,-4.631805,48.11133,1456802713,POINT (-4.631805 48.11133)
3,12576920,227574020,15.0,-127.0,0.0,241.7,511,-4.496673,48.382454,1456802713,POINT (-4.496673 48.382454)
4,12576919,227008170,0.0,0.0,0.0,135.0,144,-4.486115,48.381565,1456802713,POINT (-4.486115 48.381565)


In [17]:
### DROP OUTLIERS IN SAMPLE_TRAJECTORIES BASED ON TIMESTAMP ---- WIP; IDK IF I'LL APPLY THIS OR NOT (SO FAR: NOT APPLIED)
for mmsi in tqdm_notebook(sample_trajectories.mmsi.unique()):
    mmsi_ts_outliers = gspp.get_outliers(sample_trajectories.loc[sample_trajectories.mmsi == mmsi].ts, alpha=1.5)
#     sample_trajectories = sample_trajectories.drop(mmsi_ts_outliers)
    print (mmsi_ts_outliers)

HBox(children=(IntProgress(value=0, max=197), HTML(value='')))

Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([117415, 117421, 117430, 117440, 117458, 117465, 117482, 117500,
            119040, 119056, 119074, 119512, 119534, 119966, 119977, 119982,
            120011, 120028, 120044, 120057, 120063, 129542, 174651, 179446,
            179467, 179505, 181826, 181859, 181882, 182176, 182184, 182206,
            182456, 183058, 183064, 183095, 183118, 183351, 183359, 183371,
            183382, 183410, 185720, 187276, 187284, 189801, 189813, 190085,
            190091, 192715, 192725, 192743, 192779, 192797, 193054, 193062,
            193081, 193089, 193344, 193396, 193691, 193697, 193717, 193726,
            193734, 193742, 193745, 193752, 194052, 194059, 194065, 194075,
            194083, 194090, 194103, 194105, 194117, 194421, 194439, 194444,
            194451, 195989, 195999, 196026, 196032, 196040, 196220, 196234,
            196243, 196272, 196277, 196501, 196512, 196557],
           dtype='int64')
Int64Index([], dt

Int64Index([ 45666,  45690,  45703,  45730,  45742,  65769,  65906,  65931,
             67419,  69949,  69966,  69982, 156720, 156743, 156782, 157433,
            157447, 157466, 157501, 157537, 157658, 157776, 157796, 158040,
            158117, 158564, 158594, 158879, 158973, 159183, 159194, 159216,
            159259, 159308, 159446, 159461, 159925, 159937, 160024, 160257,
            160368, 160833, 161358, 161544, 161881, 162097, 162263, 162276,
            162626, 162728, 163144, 163155, 163586, 163779, 165239, 165261,
            165757, 165890, 166097, 166258, 166551, 166765, 168278, 169588,
            170482, 170814, 171190, 182789, 182800, 182827, 182836],
           dtype='int64')
Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([67653, 67834, 68148, 68166], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
Int64Index([], dtype='int64')
I

In [18]:
### DROP OUTLIERS IN SAMPLE_TRAJECTORIES BASED ON VELOCITY (POTENTIAL-AREA-OF-ACTIVITY)
sample_trajectories['velocity'] = np.nan

for mmsi in tqdm_notebook(sample_trajectories.mmsi.unique()):
    try:
        sample_trajectories.loc[sample_trajectories.mmsi == mmsi] = gspp.calculate_velocity(sample_trajectories.loc[sample_trajectories.mmsi == mmsi], smoothing=True, window=5, center=True)
#         mmsi_vel_outliers = gspp.get_outliers(sample_trajectories.loc[sample_trajectories.mmsi == mmsi].velocity, alpha=3)
        mmsi_vel_outliers = sample_trajectories.iloc[(sample_trajectories.mmsi == mmsi) & (sample_trajectories.velocity >= 102.2)]
        print(mmsi_vel_outliers)
        sample_trajectories = sample_trajectories.drop(mmsi_vel_outliers)
    except:
        continue

sample_trajectories = sample_trajectories.reset_index(drop=True)
sample_trajectories = sample_trajectories.dropna(subset=['id', 'mmsi'])
sample_trajectories = sample_trajectories.fillna(0)

HBox(children=(IntProgress(value=0, max=197), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  gdf['current_loc'] = gdf.geom.apply(lambda x: (x.x,x.y))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  gdf['next_loc'] = gdf.geom.shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  gdf['next_loc'] = gdf.next_loc.apply(lambda x : (x.x,x.y))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_index




In [19]:
sample_trajectories

Unnamed: 0,id,mmsi,status,turn,speed,course,heading,lon,lat,ts,geom,velocity
0,12576917.0,227705102.0,15.0,-127.0,0.0,261.8,511.0,-4.496568,48.382435,1.456803e+09,POINT (-4.496568 48.382435),0.000000
1,12576916.0,227941000.0,7.0,0.0,0.0,285.0,8.0,-4.327213,48.100086,1.456803e+09,POINT (-4.3272133 48.100086),0.000000
2,17515086.0,227300000.0,7.0,-126.0,2.8,34.2,346.0,-4.631805,48.111330,1.456803e+09,POINT (-4.631805 48.11133),0.000000
3,12576920.0,227574020.0,15.0,-127.0,0.0,241.7,511.0,-4.496673,48.382454,1.456803e+09,POINT (-4.496673 48.382454),0.000000
4,12576919.0,227008170.0,0.0,0.0,0.0,135.0,144.0,-4.486115,48.381565,1.456803e+09,POINT (-4.486115 48.381565),0.000000
5,12576918.0,227016100.0,0.0,0.0,0.0,264.3,174.0,-4.481568,48.381393,1.456803e+09,POINT (-4.4815683 48.381393),0.000000
6,17596828.0,256494000.0,5.0,0.0,0.0,344.0,217.0,-4.451149,48.383625,1.456803e+09,POINT (-4.4511485 48.383625),0.000000
7,12576921.0,228186700.0,15.0,-127.0,102.3,360.0,511.0,-4.512498,48.370834,1.456803e+09,POINT (-4.5124984 48.370834),0.000000
8,12293637.0,227006750.0,0.0,127.0,0.0,266.1,267.0,-4.484478,48.381172,1.456803e+09,POINT (-4.4844785 48.381172),0.000000
9,12293636.0,228394000.0,7.0,-127.0,1.7,77.0,511.0,-4.654577,48.123035,1.456803e+09,POINT (-4.654577 48.123035),0.000000


In [None]:
plot_idx = 0
for mmsi in sample_trajectories.mmsi.unique():
    plt.figure(plot_idx)
    pois = gspp.get_outliers(sample_trajectories.loc[sample_trajectories.mmsi == mmsi].velocity, alpha=2)
    
    print (pois)
    sample_trajectories.loc[sample_trajectories.mmsi == mmsi].velocity.plot(figsize=(20,10), c=(0,0,0))
    for poi in pois:
        plt.axvline(x=poi, c='r')
    
    plt.show()
    plot_idx += 1

In [20]:
mmsi_resampled = []
for mmsi in tqdm_notebook(sample_trajectories.mmsi.unique()):
    tmp = gspp.resample_geospatial(sample_trajectories.loc[sample_trajectories.mmsi == mmsi], rule = '60S', method='linear', crs = {'init': 'epsg:4326'}, drop_lon_lat = True)
    tmp = tmp.drop(['id', 'status', 'turn', 'speed', 'ts'], axis=1)
    mmsi_resampled.append(tmp)
        
sample_trajectories_resampled = pd.concat(mmsi_resampled)
sample_trajectories_resampled = sample_trajectories_resampled.sort_values('datetime').reset_index(drop=True)

HBox(children=(IntProgress(value=0, max=197), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  sample_ves['datetime'] = pd.to_datetime(sample_ves['ts'], unit='s')





In [None]:
plot_idx = 0
for mmsi in sample_trajectories_resampled.mmsi.unique():
    plt.figure(plot_idx)
    sample_trajectories_resampled.loc[sample_trajectories_resampled.mmsi == mmsi].velocity.plot(figsize=(20,10), c=(0,0,0))
    plt.show()
    plot_idx += 1 

In [21]:
sample_trajectories_resampled.head(20)

Unnamed: 0,mmsi,course,heading,geom,velocity,datetime
0,227705102.0,261.8,511.0,POINT (-4.496568 48.382435),0.0,2016-03-01 03:25:00
1,227300000.0,34.2,346.0,POINT (-4.631805 48.11133),0.0,2016-03-01 03:25:00
2,227574020.0,241.7,511.0,POINT (-4.496673 48.382454),0.0,2016-03-01 03:25:00
3,227008170.0,135.0,144.0,POINT (-4.486115 48.381565),0.0,2016-03-01 03:25:00
4,227016100.0,264.3,174.0,POINT (-4.4815683 48.381393),0.0,2016-03-01 03:25:00
5,256494000.0,344.0,217.0,POINT (-4.4511485 48.383625),0.0,2016-03-01 03:25:00
6,228186700.0,360.0,511.0,POINT (-4.5124984 48.370834),0.0,2016-03-01 03:25:00
7,227006750.0,266.1,267.0,POINT (-4.4844785 48.381172),0.0,2016-03-01 03:25:00
8,228394000.0,77.0,511.0,POINT (-4.654577 48.123035),0.0,2016-03-01 03:25:00
9,227003050.0,298.8,150.0,POINT (-4.4857283 48.38113),0.0,2016-03-01 03:25:00


## Plot the Preprocessed Trajectories (just to be sure)

In [21]:
ax = sample_trajectories_resampled.to_crs(epsg=3857).plot(figsize=(10, 10))
ctx.add_basemap(ax, zoom=11)
plt.show()

<matplotlib.axes._subplots.AxesSubplot at 0x7f787f1eb9b0>

## Search for Flocks
* #### 1. The Clustering will take place in time slices 
* #### 2. Possible Features: (X_coord, Y_coord, course)

* #### Set up a Color Map (for the Plots)

In [22]:
LABEL_COLOR_MAP = {-1 : 'black', 0 : 'white', 1 : 'r', 2 : 'g',\
                   3 : 'b', 4 : 'm', 5 : 'y', 6 : 'maroon', 7 : 'pink',\
                   8 : 'sienna', 9 : 'darkslategray', 10 : 'purple', 
                   11 : 'darkgoldenrod', 12: 'chocolate'}

* #### Getting a Sample Time Frame (to test the clustering algorithms)

In [23]:
# sample_datetime = np.datetime64('2016-03-02T10:04:00.000000000') 
# sample_datetime = np.datetime64('2016-03-01T19:13:00.000000000')
# THE BEST ONE SO FAR
sample_datetime = np.datetime64('2016-03-01T16:32:00.000000000')
## 
# while True:
#     sample_datetime = np.random.choice(sample_trajectories_resampled.datetime)
#     sample_timeFrame = sample_trajectories_resampled.loc[sample_trajectories_resampled.datetime == sample_datetime].drop_duplicates(subset=['mmsi', 'datetime'])
#     sample_timeFrame = sample_timeFrame.sort_values('datetime').reset_index(drop=True)
#     if (mean_distance_to_nearest_port(sample_timeFrame, ports) > 0.11): break
sample_timeFrame = sample_trajectories_resampled.loc[sample_trajectories_resampled.datetime == sample_datetime].drop_duplicates(subset=['mmsi', 'datetime'])
sample_timeFrame;

In [116]:
ax = sample_timeFrame.to_crs(epsg=3857).plot(figsize=(10, 10))
ctx.add_basemap(ax, zoom=11)
plt.show()

<matplotlib.axes._subplots.AxesSubplot at 0x7f7968425320>

* ## K-Means Clustering 

In [44]:
# gdf_tmp = sample_trajectories_resampled.loc[sample_trajectories_resampled.datetime == sample_datetime].drop_duplicates(subset=['mmsi', 'datetime'])  
flocks = gsgp.flock_mining(sample_timeFrame, doi=None, init='k-means++', n_init=10, n_jobs=-1, precompute_distances=True, random_state=0, verbose=0) 

  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/66 [00:00<?, ?it/s][A
 12%|█▏        | 8/66 [00:00<00:00, 72.74it/s][A
 20%|█▉        | 13/66 [00:00<00:00, 58.97it/s][A
 26%|██▌       | 17/66 [00:00<00:01, 47.29it/s][A
 30%|███       | 20/66 [00:00<00:01, 37.59it/s][A
 35%|███▍      | 23/66 [00:00<00:01, 30.59it/s][A
 39%|███▉      | 26/66 [00:00<00:01, 27.03it/s][A
 44%|████▍     | 29/66 [00:00<00:01, 24.29it/s][A
 48%|████▊     | 32/66 [00:01<00:01, 21.58it/s][A
 53%|█████▎    | 35/66 [00:01<00:01, 19.39it/s][A
 56%|█████▌    | 37/66 [00:01<00:01, 17.61it/s][A
 59%|█████▉    | 39/66 [00:01<00:01, 16.37it/s][A
 62%|██████▏   | 41/66 [00:01<00:01, 15.22it/s][A
 65%|██████▌   | 43/66 [00:01<00:01, 14.08it/s][A
 68%|██████▊   | 45/66 [00:02<00:01, 13.47it/s][A
 71%|███████   | 47/66 [00:02<00:01, 12.92it/s][A
 74%|███████▍  | 49/66 [00:02<00:01, 12.36it/s][A
 77%|███████▋  | 51/66 [00:02<00:01, 11.90it/s][A
 80%|████████  | 53/66 [00:02<00:01, 11.50it/s][A
 83

In [45]:
flocks

Unnamed: 0,flocks,start_time,end_time
0,"[51882, 51898]",2016-03-01 16:32:00,
1,"[51848, 51856, 51890, 51907]",2016-03-01 16:32:00,
2,"[51847, 51850, 51852, 51853, 51854, 51855, 518...",2016-03-01 16:32:00,
3,"[51876, 51880, 51884]",2016-03-01 16:32:00,
4,[51903],2016-03-01 16:32:00,
5,"[51849, 51867, 51892]",2016-03-01 16:32:00,
6,[51895],2016-03-01 16:32:00,
7,"[51845, 51851, 51859, 51861, 51864, 51885, 519...",2016-03-01 16:32:00,
8,"[51858, 51879]",2016-03-01 16:32:00,
9,"[51886, 51888]",2016-03-01 16:32:00,


* #### Plotting the Results of Flock Mining...

In [47]:
label_color = [LABEL_COLOR_MAP[l] for l in flocks[str(sample_timeFrame.datetime.unique()[0])][2]]
ax = sample_timeFrame.to_crs(epsg=3857).plot(figsize=(10, 10), c=label_color)
ctx.add_basemap(ax, zoom=11)
plt.show()

<matplotlib.axes._subplots.AxesSubplot at 0x7f775682a710>

### __Q:__ sklearn KMeans uses Euclidean Distance; Which is not Good on GeoSpatial Data. What if we could incorporate the Haversine Formula to get a bit more Accurate Clusters?

## Search for Convoys (via DBSCAN Clustering)

* #### Defining our Custom Metric (a.k.a Haversine Distance)

* ## K-Means Clustering 

In [51]:
from haversine import haversine

def haversine_distance(x, y):
    vector_alpha = (x[0], x[1])
    vector_beta = (y[0], y[1])
    point_dist = haversine(vector_alpha, vector_beta)*0.539956803

    try:
        feature_dist = np.linalg.norm(x[2:] - y[2:])
    except IndexError:
        feature_dist = 0
    return point_dist + feature_dist

In [52]:
gdf_test = sample_timeFrame[['lon', 'lat']]
scaler   = MinMaxScaler()
X_std    = scaler.fit_transform(gdf_test.values)

In [53]:
from sklearn.cluster import DBSCAN

clustering = DBSCAN(eps=2.5, min_samples=3, metric=haversine_distance,\
                    metric_params=None, algorithm='auto',\
                    leaf_size=50, p=None, n_jobs=-1).fit(X_std)

dbscan_convoys = clustering.labels_
dbscan_convoys

array([ 0, -1,  1,  2, -1,  1,  0,  1,  1,  1,  1,  2,  1, -1,  0,  1,  0,
        1,  1, -1,  3,  1, -1,  1,  1,  1, -1, -1,  1,  3,  1, -1,  1,  1,
       -1, -1,  1, -1,  1, -1,  0, -1,  1, -1,  1,  2,  1, -1,  1,  1, -1,
        1,  1, -1,  1, -1,  1,  1, -1,  1,  3,  1, -1,  3, -1,  1])

* #### Plotting the Results of Convoy Mining (via DBSCAN Clustering)...

In [54]:
label_color = [LABEL_COLOR_MAP[l] for l in dbscan_convoys]
ax = sample_timeFrame.to_crs(epsg=3857).plot(figsize=(10, 10), c=label_color)
ctx.add_basemap(ax, zoom=11)
plt.show()

<matplotlib.axes._subplots.AxesSubplot at 0x7f77565dbda0>

# FUNCTIONS

In [1]:
def sizeof_fmt(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

In [2]:
def make_lines(gdf, df_out, i, geometry = 'geometry'):
    geom0 = gdf.loc[i][geometry]
    geom1 = gdf.loc[i + 1][geometry]
    
    start, end = [(geom0.x, geom0.y), (geom1.x, geom1.y)]
    line = LineString([start, end])
    
    # Create a DataFrame to hold record
    data = {'id': i,
            'geometry': [line]}
    df_line = pd.DataFrame(data, columns = ['id', 'geometry'])
    
    # Add record DataFrame of compiled records
    df_out = pd.concat([df_out, df_line])
    return df_out

In [3]:
def mean_distance_to_nearest_port(gdf, ports):
    '''
    Calculates the minimum distance between the point and the lists of ports. Can be used to determine if the ship is sailing or not
    '''
    counter = 0
    for point in tqdm_notebook(gdf.geom):
        counter += ports.geom.distance(point).min()
    
    return counter/len(gdf)