# Pipeline B

In [None]:
##> Automated module reload
%load_ext autoreload
%autoreload 2 #

In [None]:
import os
import sys
sys.path.append("../")
# sys.path.append("../../")

import datetime
from datetime import datetime, timedelta, date

import numpy as np
import pandas as pd
from pandas import read_csv, to_datetime

from geopandas import GeoDataFrame, read_file
import movingpandas as mpd
from shapely.geometry import Point


from dtaidistance import dtw_ndim
from dtaidistance import dtw

from sklearn.cluster import HDBSCAN


import matplotlib.pyplot as plt

import hvplot
import hvplot.pandas
import holoviews as hv
from holoviews import opts, dim;
import matplotlib.pyplot as plt
from holoviews.element.tiles import EsriImagery
opts.defaults(opts.Overlay(active_tools=['wheel_zoom'], frame_width=500, frame_height=400))
hvplot_defaults = {'tiles':'ESRI', 'cmap':'Viridis', 'colorbar':True}

In [None]:
from src.macros.macros1 import COLUMNS_DTYPES
from src.macros.macros1 import COASTLINE
from src.preprocess.load import MMSIperDayFeatures, TYPES_TO_INT, get_data_for_time_frame, query_by_type

+ Import the Coastal line map and convert to geoDataFrame

In [None]:
file = '../assets/maps/kiel_fjord_epsg4326.geojson'
map_gdf = read_file(file)
map_coastline = map_gdf[map_gdf.name=="kiel fjord coastline"]

In [None]:
coastline_plt = map_coastline.hvplot(geo=True, line_width=2, line_color='black', alpha=1).opts(bgcolor='rgba(0, 0, 0, 0)')#, xaxis=None, yaxis=None, toolbar=None, show_frame=False)

## 1. LOAD the data

In [None]:
START_DATE = date(year=2022, month=7, day=5)
END_DATE = date(year=2022, month=7, day=5)

In [None]:
src = r"../assets/csv/"
data = get_data_for_time_frame(src, START_DATE, END_DATE)

data: list
+ mmsi: int
+ day: datetime.date
+ static: DataFrame
+ own: DataFrame
+ s2s: DataFrame

## 2. select by TYPE

In [None]:
selected_type = "SAILING_FAMILY"
data_selected = query_by_type(data, selected_type)
print(f"Number of {selected_type} ships found: {len(data_selected)} ships")

## 3. prepare DATA

- Update the trajectory id
- feature set (subset of own/s2s)
- prepare the data

In [10]:
##> Update the traj_id of each of the data.own in the structure of f"{mmsi}.{day}.{traj_id}" <##

#> Loop over the data objects list
for ft in data_selected:
    mmsi = ft.mmsi
    new_col = ft.own["mmsi"].astype(str) + "." + ft.own["traj_id"].astype(str)
    ft.own["traj_id"] = new_col

In [11]:
own_feature_list = [d.own for d in data] # list of df

+ Store in TrajectoryCollection

In [12]:
traj_list = []
for ft in data:
    df = ft.own
    geometry = [Point(xy) for xy in zip(df['inter_lon'], df['inter_lat'])] # ! lon->lat and not lat->lon
    gdf = GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)
    traj_list.append(mpd.Trajectory(gdf, traj_id='traj_id', obj_id='mmsi' ,t='t'))

In [13]:
tc = mpd.TrajectoryCollection(traj_list, traj_id_col='traj_id', t='t', crs='epsg:4326')

In [14]:
##> Check trajectories validity <##

valid_list = []
for traj in tc.trajectories:
    valid_list.append(traj.is_valid())
    # print(traj.crs)
all(valid_list)

True

## Generalize the Trajectories

Generalizing the trip trajectories speeds ups the following aggregation step

In [15]:
tc_generalized = mpd.MinDistanceGeneralizer(tc).generalize(tolerance=100)

## Flow and Heatspot Cluster

+ Aggregate the trajectories

In [16]:
aggregator = mpd.TrajectoryCollectionAggregator(tc, max_distance=1000, min_distance=100, min_stop_duration=timedelta(minutes=5))

+ Create the hotspots and cluster them

In [17]:
pts = aggregator.get_significant_points_gdf()

clusters = aggregator.get_clusters_gdf()

+ Plot

In [19]:
esri_tiles = EsriImagery()

aggreg_points = pts.hvplot(geo=True, marker='x', color='cyan', alpha=0.7).opts(bgcolor='rgba(0, 0, 0, 0)')#, xaxis=None, yaxis=None, toolbar=None, show_frame=False)
aggreg_clusters = clusters.hvplot(geo=True, color='red', size=dim('n')*0.8, alpha=0.4).opts(bgcolor='rgba(0, 0, 0, 0)')#, xaxis=None, yaxis=None, toolbar=None, show_frame=False)

aggreg_overlay = (coastline_plt * aggreg_points * aggreg_clusters).opts(title="Trajectroy Aggregation")
hvplot.save(aggreg_overlay, 'Sailing_aggreg_overlay.html', fmt='html')



In [20]:
aggreg_overlay

+ Cluster the flows

In [18]:
flows = aggregator.get_flows_gdf()

+ Plot

In [21]:
esri_tiles = EsriImagery()

flow_plt = flows.hvplot(geo=True, hover_cols=['weight'], line_width=dim('weight')*1, color='#1f77b3', alpha=0.6).opts(bgcolor='rgba(0, 0, 0, 0)')#, xaxis=None, yaxis=None, toolbar=None, show_frame=False)
flow_clusters = clusters.hvplot(geo=True, color='red', size=dim('n')*0.8, alpha=0.4).opts(bgcolor='rgba(0, 0, 0, 0)')#, xaxis=None, yaxis=None, toolbar=None, show_frame=False)

##> TODO: Overlay with tiles is not working: Plot the shoreline instead
# flow_overlay = (esri_tiles * flow_plt * flow_clusters).opts(title="Trajectroy Flow")
flow_overlay = (coastline_plt * flow_plt * flow_clusters).opts(title="Trajectroy Flow")
hvplot.save(flow_overlay, 'sailing_flow_overlay.html', fmt='html')
  



In [22]:
flow_overlay