In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import pickle

In [None]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')
get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [None]:
PLT_FIG_WIDTH = 10.487
PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618

In [None]:
ports = pd.read_pickle('./data/pkl/ports_raw.pkl')
traj  = pd.read_csv('./data/csv/nari_dynamic_test.csv')
# traj  = pd.read_csv('./data/csv/nari_dynamic.csv')

traj  = gspp.gdf_from_df(traj, crs={'init':'epsg:4326'})
ports = gspp.create_port_bounds(ports, epsg=2154, port_radius=2000)

In [None]:
gsplt.map_plot(traj, color=['steelblue'], title='Testing Trajectories', fontsize=10)

## Section 2: Distribution over Port Popularity

In [None]:
port_popularity = pd.DataFrame([], columns=['port_id', '#arrivals_departures'])

In [None]:
%%time
sindex = traj.sindex

port_popularity = pd.DataFrame([], columns=['port_id', '#arrivals_departures'])
# find the points that intersect with each subpolygon and add them to points_within_geometry
for (port_id, poly) in zip(ports.gid, ports.geom):
    # find approximate matches with r-tree, then precise matches from those approximate ones
    possible_matches_index = list(sindex.intersection(poly.bounds))
    possible_matches = traj.iloc[possible_matches_index]
    precise_matches  = possible_matches[possible_matches.intersects(poly)]
    port_popularity  = port_popularity.append(pd.DataFrame([[port_id, len(precise_matches)]], columns=['port_id', '#arrivals_departures']))
    
port_popularity.reset_index(inplace=True, drop=True)

* ### Reading CSVs

In [None]:
data_folder = './data/csv/stats/port_popularity_report'
for cnt, file in enumerate(os.listdir(data_folder)):
    tmp = pd.read_csv(os.path.join(data_folder, file))
    if cnt==0:
        port_popularity = tmp
    else: 
        port_popularity['#arrivals_departures'] = port_popularity['#arrivals_departures'] + tmp['#arrivals_departures']

In [None]:
port_popularity.to_csv('./data/csv/stats/port_popularity_report.csv', index=False, header=True)

In [None]:
port_popularity = pd.read_csv('./data/csv/stats/port_popularity_report.csv')

In [None]:
port_popularity_merged = pd.merge(ports, port_popularity, how='inner', left_on='gid', right_on='port_id')
port_popularity_merged.head()

In [None]:
port_popularity_merged.sort_values(['#arrivals_departures'], ascending=False)

In [None]:
import matplotlib
from sklearn.preprocessing import MinMaxScaler

# scaler = MinMaxScaler(feature_range=(0,1))
# port_popularity_merged['cmap'] = scaler.fit_transform(port_popularity.values[:,1].reshape(-1,1))

port_popularity_merged['cmap'] = port_popularity_merged['#arrivals_departures'].apply(np.log10).replace(-np.inf, 0)
gsplt.map_plot(port_popularity_merged, figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT), column='cmap', color=[None], cmap='viridis', title='Port Popularity', fontsize=10, legend=True)
plt.axis('off')
plt.show()
# gsplt.map_plot(port_popularity_merged, column='#arrivals_departures', color=[None], cmap='viridis', title='Port Popularity', fontsize=10, legend=True)

## Section 3: Distribution of AIS Activity
  * ### Outside Port

In [None]:
def vessel_ais_activity(vessel, ports, port_epsg=2154, port_radius=2000, temporal_threshold=12):                                               
    port_bounds = gspp.create_port_bounds(ports, epsg=port_epsg, port_radius=port_radius)
    port_segmented_trajectories = gspp.segment_trajectories_v2(vessel, port_bounds)
    temporal_segmented_trajectories = gspp.__temporal_segment(port_segmented_trajectories, temporal_threshold=temporal_threshold)
    vessel_fn = pd.concat(temporal_segmented_trajectories, ignore_index=True)
    vessel_fn.sort_values('ts', inplace=True)
    vessel_fn.drop(['index'], axis=1, inplace=True)
    return vessel_fn

In [None]:
traj_seg = traj.groupby('mmsi', group_keys=False).apply(vessel_ais_activity, ports)
traj_seg.sort_values('ts', inplace=True)

In [None]:
gsplt.map_plot(traj_seg, color=[None], column='traj_id', cmap='tab20',  title='Testing Trajectories Segments', fontsize=10, legend=True)

In [None]:
ais_activity_outside_port = traj_seg.groupby(['mmsi', 'traj_id_12h_gap', pd.to_datetime(traj_seg.ts, unit='s').dt.date], group_keys=False).apply(lambda df: df.ts.diff().sum()/3600).to_frame().reset_index()
ais_activity_outside_port.columns = ['mmsi', 'traj_id_temporal_gap','date', '#hrs']
ais_activity_outside_port = ais_activity_outside_port.groupby(['mmsi', 'date'], group_keys=False).apply(lambda df: df['#hrs'].sum()).to_frame().reset_index()
ais_activity_outside_port.columns = ['mmsi', 'date', '#hrs']
ais_activity_outside_port

  * ### Within Port

In [None]:
ais_activity_within_port = pd.DataFrame([], columns=['mmsi', 'date', '#hrs'])

for groupby_cols, mmsi_traj in traj_seg.groupby(['mmsi', pd.to_datetime(traj_seg.ts, unit='s').dt.date]):
    breaking_points = mmsi_traj.loc[mmsi_traj.traj_id.diff() == 1].index.tolist()
#     print ('Vessel: ', groupby_cols[0])
#     print ('Breaking Points:\n', breaking_points)
    if (len(breaking_points) == 0):
        ais_activity_within_port = ais_activity_within_port.append(pd.DataFrame([[groupby_cols[0], groupby_cols[1], 0]], columns=['mmsi', 'date', '#hrs']))
        continue
        
    hrs = np.around(np.sum([mmsi_traj.loc[bp-1:bp,:].ts.diff().values[1] for bp in breaking_points])/3600, 3)
    ais_activity_within_port = ais_activity_within_port.append(pd.DataFrame([[groupby_cols[0], groupby_cols[1], hrs]], columns=['mmsi', 'date', '#hrs']))

ais_activity_within_port.reset_index(drop=True, inplace=True)

In [None]:
ais_activity_within_port

# To Use on the Script

In [None]:
for mmsi, ves_traj in traj.groupby('mmsi', group_keys=False):
    ves_traj = vessel_activity_outside_port(ves_traj, ports)
    ais_activity_outside_port = ves_traj.groupby(['mmsi', 'traj_id_12h_gap', pd.to_datetime(ves_traj.ts, unit='s').dt.date], group_keys=False).apply(lambda df: df.ts.diff().sum()/3600).to_frame().reset_index()
    ais_activity_outside_port.columns = ['mmsi', 'traj_id_temporal_gap','date', '#hrs']
    print(ais_activity_outside_port.head())

In [None]:
# tmp = points_within_geometry.groupby(['mmsi', pd.to_datetime(points_within_geometry.ts, unit='s').dt.date], group_keys=False).apply(lambda df: df.ts.diff().sum()/3600).to_frame().reset_index()
# tmp.columns = ['mmsi', 'date', '#hrs']
# tmp

# gsplt.map_plot(points_within_geometry, points_outside_geometry, color=['r','steelblue'], title='Points Outside/Inside Port Radius', fontsize=10)

## Read CSVs And Make Plots

In [None]:
data_folder = './data/csv/stats/nari_dynamic_ais_activity_report/'

nari_dynamic_ais_activity_report_outside_port = []
nari_dynamic_ais_activity_report_within_port = []

for cnt, file in enumerate(os.listdir(data_folder)):
    tmp = pd.read_csv(os.path.join(data_folder, file))
    if 'outside' in file: 
        print (f'\'outside\' in: {file}')
        nari_dynamic_ais_activity_report_outside_port.append(tmp)
    elif 'within' in file: 
        print (f'\'within\' in: {file}')
        nari_dynamic_ais_activity_report_within_port.append(tmp)
        
nari_dynamic_ais_activity_report_outside_port = pd.concat(nari_dynamic_ais_activity_report_outside_port, ignore_index=True)
nari_dynamic_ais_activity_report_within_port = pd.concat(nari_dynamic_ais_activity_report_within_port, ignore_index=True)

nari_dynamic_ais_activity_report_outside_port.sort_values(['date'], inplace=True)
nari_dynamic_ais_activity_report_within_port.sort_values(['date'], inplace=True)

nari_dynamic_ais_activity_report_outside_port.reset_index(drop=True, inplace=True)
nari_dynamic_ais_activity_report_within_port.reset_index(drop=True, inplace=True)

In [None]:
nari_dynamic_ais_activity_report_outside_port.to_csv('./data/csv/stats/nari_dynamic_ais_activity_report_outside_port.csv', index=False, header=True)
nari_dynamic_ais_activity_report_within_port.to_csv('./data/csv/stats/nari_dynamic_ais_activity_report_within_port.csv', index=False, header=True)

In [None]:
tmp = nari_dynamic_ais_activity_report_outside_port.merge(nari_dynamic_ais_activity_report_within_port, how='outer', left_on=['mmsi','date'], right_on = ['mmsi','date'], suffixes=('_outside_port', '_within_port'))
tmp.to_csv('./data/csv/stats/nari_dynamic_ais_activity_report.csv', index=False, header=True)

In [None]:
tmp = pd.read_csv('./data/csv/stats/nari_dynamic_ais_activity_report.csv')

In [None]:
tmp.head()

In [None]:
ais_activity_by_date = tmp.groupby([pd.to_datetime(tmp.date).dt.to_period('D')]).apply(lambda df: pd.DataFrame([[df['#hrs_outside_port'].sum(), df['#hrs_within_port'].sum()]])).reset_index().drop(['level_1'], axis=1)
ais_activity_by_date.columns = ['date', '#Hours Outside Port', '#Hours Within Port']
ais_activity_by_date.head()

In [None]:
ais_activity_by_date.plot.bar(cmap='tab20', x='date', figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT), width=0.95, rot=0)
plt.show()

In [None]:
avg_ais_activity_by_date = tmp.groupby([tmp.date]).apply(lambda df: pd.DataFrame([[df['#hrs_outside_port'].mean(), df['#hrs_within_port'].mean()]])).reset_index().drop(['level_1'], axis=1)
avg_ais_activity_by_date.columns = ['date', 'Avg. #Hours Outside Port', 'Avg. #Hours Within Port']
avg_ais_activity_by_date.head()

In [None]:
ais_activity_by_mmsi = tmp.groupby(['mmsi', pd.to_datetime(tmp.date).dt.to_period('D')]).apply(lambda df: pd.DataFrame([[df['#hrs_outside_port'].sum(), df['#hrs_within_port'].sum(), 24-df['#hrs_outside_port'].sum()+df['#hrs_within_port'].sum()]])).reset_index().drop(['level_2'], axis=1)
ais_activity_by_mmsi = ais_activity_by_mmsi.groupby(['mmsi']).apply(lambda df: pd.DataFrame([[df[0].sum(), df[1].sum(), df[2].sum()]])).reset_index().drop(['level_1'], axis=1)
ais_activity_by_mmsi.columns = ['mmsi', '#Hours Outside Port', '#Hours Within Port', '#Hours AIS Device OFF']
ais_activity_by_mmsi

In [None]:
df = pd.DataFrame({'#Hours': ais_activity_by_mmsi.iloc[:, 1:].sum().values}, 
             index=['Outside Port', 'Within Port', 'AIS Device OFF'])

explode = (0.1, 0, 0)  
ax = df.plot.pie(y='#Hours', 
                   figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT), 
                   explode=explode,
                   startangle=90,
                   autopct='%1.1f%%', 
                   pctdistance=0.84,
                   cmap='tab20c',
                   legend=False)
ax.axis('equal')

for text in ax.texts:
    text.set_fontsize(15)
    if ' ' in text.get_text():
        text.set_color('grey')

plt.ylabel('')

In [None]:
avg_ais_activity_by_vessel = tmp.groupby(['mmsi']).apply(lambda df: pd.DataFrame([[df['#hrs_outside_port'].mean(), df['#hrs_within_port'].mean()]])).reset_index().drop(['level_1'], axis=1)
avg_ais_activity_by_vessel.columns = ['mmsi', '#Hours Outside Port', '#Hours Within Port'] 
avg_ais_activity_by_vessel.plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT), width=0.95, rot=0)

In [None]:
cumulative_ais_activity_by_week = tmp.groupby([pd.to_datetime(tmp.date).dt.to_period('W')]).apply(lambda df : pd.DataFrame([[df['#hrs_outside_port'].sum(), df['#hrs_within_port'].sum()]])).reset_index().drop(['level_1'], axis=1)
cumulative_ais_activity_by_week.columns = ['week', '#Hours Outside Port', '#Hours Within Port'] 
cumulative_ais_activity_by_week.plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT), width=0.95, rot=0)

In [None]:
avg_ais_activity_by_week = tmp.groupby([pd.to_datetime(tmp.date).dt.to_period('W')]).apply(lambda df : pd.DataFrame([[df['#hrs_outside_port'].mean(), df['#hrs_within_port'].mean()]])).reset_index().drop(['level_1'], axis=1)
avg_ais_activity_by_week.plot.bar(cmap='tab20', figsize=(PLT_FIG_WIDTH, PLT_FIG_HEIGHT), width=0.95, rot=0)