### Import packages and set paths

In [None]:
import os

import dask.dataframe as dd
import dask_gateway
import dask.distributed

import dotenv
import warnings
warnings.filterwarnings('ignore')

In [None]:
import matplotlib.pyplot as plt
import geopandas
from shapely.geometry import Polygon, LineString, Point, MultiPolygon
from shapely.ops import transform, cascaded_union
import numpy as np
import movingpandas as mpd
import datetime
import pandas as pd
import geopandas as gpd
import os
import pyproj
import scipy
import pyarrow as pa
import pickle

In [None]:
#sets the path to load pre-processed ais data
folder_name = '2022_PoR'
path_name = 'abfs://ais/parquet/' + folder_name  

### Loads the access token (we use a SAS-token to protect the data)

In [None]:
# this is for environmental variables for secrets (needs python-dotenv)
# You can copy the  .env.example file and rename it to .env (one directory  up from the notebooks)
# 
%load_ext dotenv
# Load environment variables from the .env file 1 directory up
%dotenv -v

In [None]:
# read the environment variable from the  .env file
sas_token = dotenv.dotenv_values()['AZURE_BLOB_SAS_TOKEN']

In [None]:
gateway = dask_gateway.Gateway()
cluster_options = gateway.cluster_options()
cluster = gateway.new_cluster(cluster_options)
cluster.adapt(minimum=1, maximum=100)
cluster

In [None]:
client = dask.distributed.Client(cluster)
client

In [None]:
def worker_setup(dask_worker: dask.distributed.Worker):
    import os
    os.system("pip install -q movingpandas")  # or pip
    os.system("pip install -q more-itertools")
    os.system("pip install -q dask")

client.register_worker_callbacks(worker_setup)

### Creates a dataframe with all the vessels of interest and their horizontal dimensions

In [None]:
#Functions
def create_ship_dataframe(df,static_columns):
    """ 
    Function that selects the static column information of vessels

    Parameters
    ----------
    df: pandas dataframe with AIS data
    columns: list of the column names of the dataframe that contain static information

    :returns: pandas dataframe
    """
    
    ship_df = pd.DataFrame(columns=static_columns)
    for name in list(dict.fromkeys(df.name)):
        df_ship = df[df.name == name]
        if len(df_ship):
            new_df = pd.DataFrame([],index=[name])
            for column in static_columns:
                if not column in list(df_ship.columns):
                    continue
                info = list(df_ship[column].mode())
                info.append(0)
                new_df[column] = [info[0]]
            ship_df = pd.concat([ship_df,new_df])
    return ship_df

def create_unique_shipdataframe(df):
    """ 
    Function that creates a dataframe of unique vessels and their horizontal properties

    Parameters
    ----------
    df: pandas dataframe with AIS data
    columns: list of the column names of the dataframe that contain static information

    :returns: pandas dataframe
    """
    
    unique_ship_df = pd.DataFrame(columns=df.columns)
    for name in list(dict.fromkeys(df.index)):
        if len(df[df.index == name]):
            df_ship = pd.DataFrame([df[df.index == name].iloc[0]],index=[name])
            unique_ship_df = pd.concat([unique_ship_df,df_ship])
    return unique_ship_df

In [None]:
static_columns = ['vesseltype','hazardouscargo','length','width']
ddf = dd.read_parquet(path_name+'/selected_vessels_for_further_analysis', storage_options={"account_name": "rwsais", "sas_token": sas_token})
ddf_i = ddf.partitions[:2]
ddf_i = ddf_i.map_partitions(create_ship_dataframe,static_columns=static_columns,meta=pd.DataFrame(columns=static_columns))
ddf_i = ddf_i.repartition(npartitions=1)
ddf_i = ddf_i.map_partitions(create_unique_shipdataframe)
scheme_information = {'vesseltype': pa.int64(),
                      'hazardouscargo': pa.int64(),
                      'length': pa.float64(),
                      'width': pa.float64(),
                      '__null_dask_index__': pa.string()}
ddf_i.to_parquet(path_name+'/ship_dataframe',storage_options={"account_name": "rwsais", "sas_token": sas_token},schema=scheme_information,engine='pyarrow')