In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import pickle

In [3]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')
get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [4]:
ports = pd.read_pickle('./data/pkl/ports_raw.pkl')
traj  = pd.read_pickle('./data/pkl/subseted.pckl')

traj  = gspp.gdf_from_df(traj, crs={'init':'epsg:4326'})
traj.sort_values('ts', inplace=True)
ports = gspp.create_port_bounds(ports, epsg=2154, port_radius=2000)

In [5]:
gsplt.map_plot(traj, color=['steelblue'], title='Testing Trajectories', fontsize=10)

## Section 3: Distribution of AIS Activity

In [6]:
def segment_trajectories_v2(vessel, ports, port_radius=2000, port_epsg=2154):
    '''
    Segment trajectories based on port entrance/exit
    '''
    sindex = vessel.sindex # create the spatial index (r-tree) of the vessel's data points

    if (ports.geom.type == 'Point').all():
        ports = create_port_bounds(ports, port_radius=port_radius, epsg=port_epsg)

    # find the points that intersect with each subpolygon and add them to _points_within_geometry_ DataFrame
    points_within_geometry = pd.DataFrame()
    for poly in ports.geom:
        # find approximate matches with r-tree, then precise matches from those approximate ones
        possible_matches_index = list(sindex.intersection(poly.bounds))
        possible_matches = vessel.iloc[possible_matches_index]
        precise_matches = possible_matches[possible_matches.intersects(poly)]
        points_within_geometry = points_within_geometry.append(precise_matches)

    points_within_geometry = points_within_geometry.drop_duplicates(subset=['mmsi', 'ts'])
    points_outside_geometry = vessel[~vessel.isin(points_within_geometry)].dropna(how='all')

    vessel.loc[:,'traj_id'] = np.nan
    # When we create the _traj_id_ column, we label each record with 0, 
    # if it's outside the port's radius and -1 if it's inside the port's radius. 
    vessel.loc[vessel.index.isin(points_within_geometry.index), 'traj_id'] = -1
    vessel.loc[vessel.index.isin(points_outside_geometry.index), 'traj_id'] = 0
    
    # we drop the consecutive -1 rows, except the first and last one, and segment the trajectory by the remaining -1 points
    vessel = vessel.loc[vessel.traj_id[vessel.traj_id.replace(-1,np.nan).ffill(limit=1).bfill(limit=1).notnull()].index]
    vessel.reset_index(inplace=True, drop=True)

    dfs = np.split(vessel, vessel.loc[vessel.traj_id == -1].index)
    dfs = [df for df in dfs if len(df) > 0]    # remove the fragments that are empty
    # dfs = [df for df in dfs if len(df) >= 2]    # remove the fragments that have at most 1 point
    if (len(dfs) == 0):
#         print (vessel.columns)
#         return gpd.GeoDataFrame([], columns=['mmsi', 'speed', 'lon', 'lat', 'ts', 'geom', 'traj_id', 'traj_id_12h_gap'], geometry='geom', crs={'init':'epsg:4326'}) 
        return vessel.iloc[0:0]
    
    dfs[0].loc[:,'traj_id'] = 0    # ensure that the points in the first segments have the starting ID (0)    
    # then for each sub-trajectory, we assign an incrementing number (id) to each trajectory segment, starting from 0 
    for i in range(1,len(dfs)):
        if (len(dfs[i]) == 1):
            dfs[i].loc[:,'traj_id'] = dfs[i].traj_id.apply(lambda x: dfs[i-1].traj_id.max())
        else:
            dfs[i].loc[:,'traj_id'] = dfs[i].traj_id.apply(lambda x: x+dfs[i-1].traj_id.max()+1)				

    df_fn = pd.concat(dfs)
    df_fn.sort_values('ts', inplace=True)
    df_fn.reset_index(inplace=True, drop=True)
    return df_fn

In [7]:
def __temporal_segment(vessel, temporal_threshold=12):
    if len(vessel) == 0:
        vessel['traj_id_12h_gap'] = None
        return [vessel.iloc[0:0]]
#         return [gpd.GeoDataFrame([], columns=['mmsi', 'speed', 'lon', 'lat', 'ts', 'geom', 'traj_id', 'traj_id_12h_gap'], geometry='geom', crs={'init':'epsg:4326'})]
    
    print(f"Vessel: {vessel.mmsi.unique()[0]}")
    print(f"Segments Before: {len(vessel.traj_id.unique())}")
    vessel['traj_id_12h_gap'] = 0
    vessel.sort_values(['ts'], inplace=True)
    temporal_threshold = 12 # in hrs

    dfs_temporal = []

    for traj_id, sdf in vessel.groupby('traj_id'):
            df = sdf.reset_index()
            break_points = df.ts.diff(-1).abs().index[df.ts.diff()>60*60*temporal_threshold]

            if (len(break_points) > 0):
                    dfs = np.split(df, break_points)
            else:
                    dfs = [df]

            dfs_temporal.extend(dfs)
            #NOTE #1: Check np.split if break_points=[], returns traj

    dfs_temporal = [tmp_df for tmp_df in dfs_temporal if len(tmp_df) >= 2]
    print(f"Segments After: {len(dfs_temporal)}")

    if (len(dfs_temporal) == 0):
#         print (vessel.columns)
#         return [gpd.GeoDataFrame([], columns=['mmsi', 'speed', 'lon', 'lat', 'ts', 'geom', 'traj_id', 'traj_id_12h_gap'], geometry='geom', crs={'init':'epsg:4326'})]
        return [vessel.iloc[0:0]]
        
    dfs_temporal[0].loc[:,'traj_id_12h_gap'] = 0
    for idx in range(1, len(dfs_temporal)):
        dfs_temporal[idx].loc[:,'traj_id_12h_gap'] = dfs_temporal[idx].traj_id_12h_gap.apply(lambda x: x+dfs_temporal[idx-1].traj_id_12h_gap.max()+1)

    return dfs_temporal

In [8]:
def vessel_activity_outside_port(vessel, ports, port_epsg=2154, port_radius=2000, temporal_threshold=12):                                               
    port_bounds = gspp.create_port_bounds(ports, epsg=port_epsg, port_radius=port_radius)
    port_segmented_trajectories = segment_trajectories_v2(vessel, port_bounds)
    temporal_segmented_trajectories = __temporal_segment(port_segmented_trajectories, temporal_threshold=temporal_threshold)
    vessel_fn = pd.concat(temporal_segmented_trajectories, ignore_index=True)
    return vessel_fn

In [9]:
traj_seg = traj.groupby('mmsi', group_keys=False).apply(vessel_activity_outside_port, ports)
traj_seg.sort_values('ts', inplace=True)
traj_seg.reset_index(drop=True)

Vessel: 212228000
Segments Before: 1
Segments After: 0
Vessel: 212228000
Segments Before: 1
Segments After: 0
Vessel: 212228000
Segments Before: 1
Segments After: 0
Vessel: 212518000
Segments Before: 1
Segments After: 1
Vessel: 215477000
Segments Before: 2
Segments After: 2
Vessel: 218566000
Segments Before: 1
Segments After: 1
Vessel: 219118000
Segments Before: 1
Segments After: 1
Vessel: 220364000
Segments Before: 1
Segments After: 1
Vessel: 220417000
Segments Before: 2
Segments After: 2
Vessel: 220497000
Segments Before: 1
Segments After: 1
Vessel: 224130870
Segments Before: 1
Segments After: 2
Vessel: 226084000
Segments Before: 2
Segments After: 2
Vessel: 226177000
Segments Before: 1
Segments After: 0
Vessel: 226178000
Segments Before: 2
Segments After: 2
Vessel: 226179000
Segments Before: 2
Segments After: 2
Vessel: 226216000
Segments Before: 3
Segments After: 3
Vessel: 226263000
Segments Before: 6
Segments After: 6
Vessel: 226318000
Segments Before: 4
Segments After: 4
Vessel: 22

Vessel: 566498000
Segments Before: 1
Segments After: 0
Vessel: 636009840
Segments Before: 1
Segments After: 1
Vessel: 636015106
Segments Before: 3
Segments After: 3
Vessel: 636016457
Segments Before: 2
Segments After: 2
Vessel: 636017106
Segments Before: 1
Segments After: 0
Vessel: 636017167
Segments Before: 1
Segments After: 1
Vessel: 636092271
Segments Before: 1
Segments After: 0
Vessel: 636092323
Segments Before: 1
Segments After: 0
Vessel: 636092331
Segments Before: 2
Segments After: 2


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  result = concat(values, axis=self.axis)


Unnamed: 0,geom,index,lat,lon,mmsi,speed,traj_id,traj_id_12h_gap,ts
0,POINT (-4.631805 48.11133),0.0,48.111330,-4.631805,227300000,2.8,0.0,0,1456802713
1,POINT (-4.654577 48.12303499999999),0.0,48.123035,-4.654577,228394000,1.7,0.0,0,1456802716
2,POINT (-4.631777 48.11138),1.0,48.111380,-4.631777,227300000,2.7,0.0,0,1456802717
3,POINT (-4.631757 48.11142),2.0,48.111420,-4.631757,227300000,2.6,0.0,0,1456802720
4,POINT (-4.631747 48.111443),3.0,48.111443,-4.631747,227300000,2.5,0.0,0,1456802723
5,POINT (-4.631737999999999 48.111473),4.0,48.111473,-4.631738,227300000,2.3,0.0,0,1456802725
6,POINT (-4.6545033 48.12306),1.0,48.123060,-4.654503,228394000,1.8,0.0,0,1456802727
7,POINT (-4.6317234 48.111507),5.0,48.111507,-4.631723,227300000,2.2,0.0,0,1456802730
8,POINT (-4.63172 48.111526),6.0,48.111526,-4.631720,227300000,2.1,0.0,0,1456802732
9,POINT (-4.631708000000001 48.11157),7.0,48.111570,-4.631708,227300000,2.0,0.0,0,1456802737


In [10]:
traj_seg.head(20)

Unnamed: 0,geom,index,lat,lon,mmsi,speed,traj_id,traj_id_12h_gap,ts
0,POINT (-4.631805 48.11133),0.0,48.11133,-4.631805,227300000,2.8,0.0,0,1456802713
0,POINT (-4.654577 48.12303499999999),0.0,48.123035,-4.654577,228394000,1.7,0.0,0,1456802716
1,POINT (-4.631777 48.11138),1.0,48.11138,-4.631777,227300000,2.7,0.0,0,1456802717
2,POINT (-4.631757 48.11142),2.0,48.11142,-4.631757,227300000,2.6,0.0,0,1456802720
3,POINT (-4.631747 48.111443),3.0,48.111443,-4.631747,227300000,2.5,0.0,0,1456802723
4,POINT (-4.631737999999999 48.111473),4.0,48.111473,-4.631738,227300000,2.3,0.0,0,1456802725
1,POINT (-4.6545033 48.12306),1.0,48.12306,-4.654503,228394000,1.8,0.0,0,1456802727
5,POINT (-4.6317234 48.111507),5.0,48.111507,-4.631723,227300000,2.2,0.0,0,1456802730
6,POINT (-4.63172 48.111526),6.0,48.111526,-4.63172,227300000,2.1,0.0,0,1456802732
7,POINT (-4.631708000000001 48.11157),7.0,48.11157,-4.631708,227300000,2.0,0.0,0,1456802737


In [115]:
traj_seg.head(20)

Unnamed: 0,geom,index,lat,lon,mmsi,speed,traj_id,traj_id_12h_gap,ts
0,POINT (-4.631805 48.11133),0.0,48.11133,-4.631805,227300000,2.8,0.0,0,1456802713
0,POINT (-4.654577 48.12303499999999),0.0,48.123035,-4.654577,228394000,1.7,0.0,0,1456802716
1,POINT (-4.631777 48.11138),1.0,48.11138,-4.631777,227300000,2.7,0.0,0,1456802717
2,POINT (-4.631757 48.11142),2.0,48.11142,-4.631757,227300000,2.6,0.0,0,1456802720
3,POINT (-4.631747 48.111443),3.0,48.111443,-4.631747,227300000,2.5,0.0,0,1456802723
4,POINT (-4.631737999999999 48.111473),4.0,48.111473,-4.631738,227300000,2.3,0.0,0,1456802725
1,POINT (-4.6545033 48.12306),1.0,48.12306,-4.654503,228394000,1.8,0.0,0,1456802727
5,POINT (-4.6317234 48.111507),5.0,48.111507,-4.631723,227300000,2.2,0.0,0,1456802730
6,POINT (-4.63172 48.111526),6.0,48.111526,-4.63172,227300000,2.1,0.0,0,1456802732
7,POINT (-4.631708000000001 48.11157),7.0,48.11157,-4.631708,227300000,2.0,0.0,0,1456802737


In [116]:
traj_seg.reset_index(inplace=True, drop=True)

In [11]:
gsplt.map_plot(traj_seg, color=[None], column='traj_id', cmap='tab20',  title='Testing Trajectories Segments', fontsize=10, legend=True)

In [98]:
ais_activity_outside_port = traj_seg.groupby(['mmsi', 'traj_id_12h_gap', pd.to_datetime(traj_seg.ts, unit='s').dt.date], group_keys=False).apply(lambda df: df.ts.diff().sum()/3600).to_frame().reset_index()
ais_activity_outside_port.columns = ['mmsi', 'traj_id_temporal_gap','date', '#hrs']
ais_activity_outside_port

Unnamed: 0,mmsi,traj_id_temporal_gap,date,#hrs
0,205067000,0,2015-10-22,1.843333
1,205067000,0,2015-10-23,21.251389
2,205067000,1,2016-02-10,1.249444
3,227519920,0,2015-10-01,0.573889
4,227519920,1,2015-10-01,0.400000
5,227519920,2,2015-10-01,2.275000
6,227519920,3,2015-10-01,0.350000
7,227519920,4,2015-10-01,0.258889
8,227519920,5,2015-10-01,0.117500
9,227519920,6,2015-10-01,0.183611


In [23]:
tmp = points_within_geometry.groupby(['mmsi', pd.to_datetime(points_within_geometry.ts, unit='s').dt.date], group_keys=False).apply(lambda df: df.ts.diff().sum()/3600).to_frame().reset_index()
tmp.columns = ['mmsi', 'date', '#hrs']
tmp

Unnamed: 0,mmsi,date,#hrs
0,227519920,2015-10-01,5.449722
1,227519920,2015-10-02,0.016389
2,227519920,2015-10-03,12.791111
3,227519920,2015-10-04,11.842778
4,227519920,2015-10-06,4.775556
5,227519920,2015-10-08,1.158333
6,227519920,2015-10-09,2.733889
7,227519920,2015-10-10,2.858056
8,227519920,2015-10-11,1.857500
9,227519920,2015-10-12,0.558611


In [13]:
gsplt.map_plot(points_within_geometry, points_outside_geometry, color=['r','steelblue'], title='Points Outside/Inside Port Radius', fontsize=10)