In [1]:
import numpy as np
import pandas as pd

import sys, os
import datetime

import importlib
from tqdm import tqdm

In [4]:
import st_toolkit as geohl
importlib.reload(geohl)

import cri_calc as cri
importlib.reload(cri)

import cri_helper as helper
importlib.reload(helper)

import nn_evaluation as nne
importlib.reload(nne);

## Load Trajectories

In [6]:
df = pd.read_csv('./data/unipi_ais_dynamic_jul2018_1w_algn_linear_v2_w_lens.csv', parse_dates=['datetime'])
gdf = geohl.getGeoDataFrame_v2(df, crs='epsg:4326')

gdf2 = gdf.loc[gdf.datetime.dt.date.between(datetime.date(2018, 7, 3), datetime.date(2018, 7, 3), inclusive='both')].copy()
gdf_sub_moving = gdf2.loc[gdf2.speed.between(1, 50, inclusive='neither')].copy()

  arr = construct_1d_object_array_from_listlike(values)


## Loading VCRA Dataset 

In [7]:
gdf_vcra = pd.read_pickle('./data/unipi_ais_dynamic_jul2018_1w_vcra_dataset_v3.pickle')

In [8]:
tqdm.pandas(desc='Adding Vessels\' Length...')

# gdf_vcra.loc[:, 'own_length'] = gdf_vcra.own_Index.apply(lambda l: gdf_sub_moving[l].length)
mlp_input = gdf_vcra.loc[gdf_vcra.own_Index.isin(gdf_sub_moving.index.values)].copy()
mlp_input.loc[:, 'own_length'] = mlp_input.own_Index.progress_apply(lambda l: gdf_sub_moving.loc[l].length)
mlp_input.loc[:, 'target_length'] = mlp_input.target_Index.progress_apply(lambda l: gdf_sub_moving.loc[l].length)

Adding Vessels' Length...: 100%|██████████| 960268/960268 [01:46<00:00, 8986.68it/s]
Adding Vessels' Length...: 100%|██████████| 960268/960268 [01:45<00:00, 9107.25it/s]


In [9]:
timeslice_lens = gdf_sub_moving.groupby('datetime').apply(len)
min_slice, median_slice, max_slice = timeslice_lens.loc[timeslice_lens == timeslice_lens.min()].index[0],\
                                     timeslice_lens.loc[timeslice_lens == timeslice_lens.median()].index[0],\
                                     timeslice_lens.loc[timeslice_lens == timeslice_lens.max()].index[0]

In [10]:
gdf_min_slice    = gdf_sub_moving.loc[gdf_sub_moving.datetime == min_slice].copy()
gdf_median_slice = gdf_sub_moving.loc[gdf_sub_moving.datetime == median_slice].copy()
gdf_max_slice    = gdf_sub_moving.loc[gdf_sub_moving.datetime == max_slice].copy()

## Training an MLP (via sklearn) -- Prototype (with own length; as included in the Paper)

In [11]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [12]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'own_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [13]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00385015
Validation score: 0.644050
Iteration 2, loss = 0.00259088
Validation score: 0.725855
Iteration 3, loss = 0.00230187
Validation score: 0.744237
Iteration 4, loss = 0.00216573
Validation score: 0.778982
Iteration 5, loss = 0.00204967
Validation score: 0.780121
Iteration 6, loss = 0.00198630
Validation score: 0.758965
Iteration 7, loss = 0.00190444
Validation score: 0.784247
Iteration 8, loss = 0.00183911
Validation score: 0.790634
Iteration 9, loss = 0.00179812
Validation score: 0.808164
Iteration 10, loss = 0.00176251
Validation score: 0.805912
Iteration 11, loss = 0.00172066
Validation score: 0.831105
Iteration 12, loss = 0.00168713
Validation score: 0.824960
Iteration 13, loss = 0.00164822
Validation score: 0.827729
Iteration 14, loss = 0.00162294
Validation score: 0.831060
Iteration 15, loss = 0.00158726
Validation score: 0.835460
Iteration 16, loss = 0.00156929
Validation score: 0.838889
Iteration 17, loss = 0.00154823
Validation score: 0.834683
Iterat



0.868270934890964

In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.01787829212857179
RMSE: 0.04847896187973707


In [15]:
from joblib import dump, load
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_own_length.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_own_length.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-parkj-with_own_length.scaler.joblib']

In [49]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_own_length.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_own_length.scaler.joblib')

In [16]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_own_length, model_norm=scaler)

196 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_own_length, model_norm=scaler)

354 ms ± 6.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_own_length, model_norm=scaler)

680 ms ± 20.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Training an MLP (via sklearn) -- Prototype (with target length)

In [19]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'target_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [20]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00391583
Validation score: 0.664977
Iteration 2, loss = 0.00257262
Validation score: 0.727071
Iteration 3, loss = 0.00228228
Validation score: 0.746228
Iteration 4, loss = 0.00213174
Validation score: 0.785120
Iteration 5, loss = 0.00200412
Validation score: 0.781859
Iteration 6, loss = 0.00192716
Validation score: 0.782292
Iteration 7, loss = 0.00185371
Validation score: 0.801844
Iteration 8, loss = 0.00179975
Validation score: 0.794698
Iteration 9, loss = 0.00175934
Validation score: 0.815371
Iteration 10, loss = 0.00172932
Validation score: 0.816071
Iteration 11, loss = 0.00167105
Validation score: 0.824834
Iteration 12, loss = 0.00165240
Validation score: 0.826684
Iteration 13, loss = 0.00161315
Validation score: 0.841290
Iteration 14, loss = 0.00159400
Validation score: 0.841351
Iteration 15, loss = 0.00155482
Validation score: 0.825070
Iteration 16, loss = 0.00154003
Validation score: 0.847538
Iteration 17, loss = 0.00152088
Validation score: 0.839932
Iterat



0.8713368534093084

In [21]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.01669295304663667
RMSE: 0.047994497012382016


In [22]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_target_length.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_target_length.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-parkj-with_target_length.scaler.joblib']

In [53]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_target_length.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_target_length.scaler.joblib')

In [23]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_target_length, model_norm=scaler)

201 ms ± 19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_target_length, model_norm=scaler)

360 ms ± 12.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [25]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_target_length, model_norm=scaler)

684 ms ± 15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Training an MLP (via sklearn) -- Prototype (with both lengths)

In [26]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'own_length', 'target_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [27]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00316194
Validation score: 0.722403
Iteration 2, loss = 0.00225024
Validation score: 0.770253
Iteration 3, loss = 0.00203678
Validation score: 0.775885
Iteration 4, loss = 0.00190592
Validation score: 0.803995
Iteration 5, loss = 0.00182232
Validation score: 0.795145
Iteration 6, loss = 0.00173025
Validation score: 0.815003
Iteration 7, loss = 0.00167437
Validation score: 0.823580
Iteration 8, loss = 0.00161848
Validation score: 0.833437
Iteration 9, loss = 0.00157605
Validation score: 0.847137
Iteration 10, loss = 0.00153821
Validation score: 0.841063
Iteration 11, loss = 0.00151086
Validation score: 0.838207
Iteration 12, loss = 0.00148582
Validation score: 0.849488
Iteration 13, loss = 0.00145137
Validation score: 0.850682
Iteration 14, loss = 0.00142890
Validation score: 0.857245
Iteration 15, loss = 0.00141052
Validation score: 0.849049
Iteration 16, loss = 0.00137840
Validation score: 0.850833
Iteration 17, loss = 0.00136663
Validation score: 0.851874
Iterat



0.8751391294555212

In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.016516879851758048
RMSE: 0.047241267822664565


In [29]:
from joblib import dump, load
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_both_lengths.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_both_lengths.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-parkj-with_both_lengths.scaler.joblib']

In [57]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_both_lengths.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_both_lengths.scaler.joblib')

In [30]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_both_length, model_norm=scaler)

192 ms ± 17.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_both_length, model_norm=scaler)

332 ms ± 11.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_both_length, model_norm=scaler)

638 ms ± 25.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Training an MLP (via sklearn) -- Prototype (with no lengths)

In [33]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [34]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00345780
Validation score: 0.714228
Iteration 2, loss = 0.00242119
Validation score: 0.757267
Iteration 3, loss = 0.00215228
Validation score: 0.798292
Iteration 4, loss = 0.00200535
Validation score: 0.799119
Iteration 5, loss = 0.00192372
Validation score: 0.797878
Iteration 6, loss = 0.00184796
Validation score: 0.811603
Iteration 7, loss = 0.00178147
Validation score: 0.805503
Iteration 8, loss = 0.00173805
Validation score: 0.815237
Iteration 9, loss = 0.00169157
Validation score: 0.825584
Iteration 10, loss = 0.00164824
Validation score: 0.839588
Iteration 11, loss = 0.00161789
Validation score: 0.846263
Iteration 12, loss = 0.00158159
Validation score: 0.828276
Iteration 13, loss = 0.00155130
Validation score: 0.837340
Iteration 14, loss = 0.00153501
Validation score: 0.848942
Iteration 15, loss = 0.00150642
Validation score: 0.858031
Iteration 16, loss = 0.00148796
Validation score: 0.837824
Iteration 17, loss = 0.00146462
Validation score: 0.852195
Iterat



0.8720737809964392

In [35]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.018860911530933527
RMSE: 0.04783611511719697


In [36]:
from joblib import dump, load
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_no_lengths.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_no_lengths.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-parkj-with_no_lengths.scaler.joblib']

In [61]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_no_lengths.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_no_lengths.scaler.joblib')

In [37]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_no_length, model_norm=scaler)

197 ms ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [38]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_no_length, model_norm=scaler)

369 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_no_length, model_norm=scaler)

695 ms ± 22.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
