In [1]:
import numpy as np
import pandas as pd

import sys, os
import datetime

import importlib
from tqdm import tqdm

In [2]:
import st_toolkit as geohl
importlib.reload(geohl)

import cri_calc as cri
importlib.reload(cri)

import cri_helper as helper
importlib.reload(helper)

import nn_evaluation as nne
importlib.reload(nne);

## Load Trajectories

In [3]:
df = pd.read_csv('./data/unipi_ais_dynamic_jul2018_1w_algn_linear_v2_w_lens.csv', parse_dates=['datetime'])
gdf = geohl.getGeoDataFrame_v2(df, crs='epsg:4326')

gdf2 = gdf.loc[gdf.datetime.dt.date.between(datetime.date(2018, 7, 3), datetime.date(2018, 7, 3), inclusive='both')].copy()
gdf_sub_moving = gdf2.loc[gdf2.speed.between(1, 50, inclusive='neither')].copy()

  arr = construct_1d_object_array_from_listlike(values)


## Loading VCRA Dataset 

In [4]:
gdf_vcra = pd.read_pickle('./data/unipi_ais_dynamic_jul2018_1w_vcra_dataset_v3.pickle')

In [5]:
tqdm.pandas(desc='Adding Vessels\' Length...')

# gdf_vcra.loc[:, 'own_length'] = gdf_vcra.own_Index.apply(lambda l: gdf_sub_moving[l].length)
mlp_input = gdf_vcra.loc[gdf_vcra.own_Index.isin(gdf_sub_moving.index.values)].copy()
mlp_input.loc[:, 'own_length'] = mlp_input.own_Index.progress_apply(lambda l: gdf_sub_moving.loc[l].length)
mlp_input.loc[:, 'target_length'] = mlp_input.target_Index.progress_apply(lambda l: gdf_sub_moving.loc[l].length)
mlp_input.loc[:, 'cri_bin'] = pd.cut(mlp_input.cri, bins=np.arange(0, 1.1, .1), right=False,)

Adding Vessels' Length...: 100%|██████████| 960268/960268 [00:36<00:00, 26315.52it/s]
Adding Vessels' Length...: 100%|██████████| 960268/960268 [00:36<00:00, 26454.23it/s]


In [6]:
timeslice_lens = gdf_sub_moving.groupby('datetime').apply(len)
min_slice, median_slice, max_slice = timeslice_lens.loc[timeslice_lens == timeslice_lens.min()].index[0],\
                                     timeslice_lens.loc[timeslice_lens == timeslice_lens.median()].index[0],\
                                     timeslice_lens.loc[timeslice_lens == timeslice_lens.max()].index[0]

In [7]:
gdf_min_slice    = gdf_sub_moving.loc[gdf_sub_moving.datetime == min_slice].copy()
gdf_median_slice = gdf_sub_moving.loc[gdf_sub_moving.datetime == median_slice].copy()
gdf_max_slice    = gdf_sub_moving.loc[gdf_sub_moving.datetime == max_slice].copy()

## Training an MLP (via sklearn) -- with own length; as included in the Paper)

In [8]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [9]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'own_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10, stratify=mlp_input['cri_bin'])

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [10]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00379227
Validation score: 0.697275
Iteration 2, loss = 0.00252789
Validation score: 0.735621
Iteration 3, loss = 0.00225819
Validation score: 0.749329
Iteration 4, loss = 0.00208126
Validation score: 0.777629
Iteration 5, loss = 0.00198242
Validation score: 0.783244
Iteration 6, loss = 0.00189374
Validation score: 0.813177
Iteration 7, loss = 0.00184354
Validation score: 0.802359
Iteration 8, loss = 0.00177591
Validation score: 0.817835
Iteration 9, loss = 0.00173640
Validation score: 0.816365
Iteration 10, loss = 0.00169408
Validation score: 0.826940
Iteration 11, loss = 0.00165977
Validation score: 0.838854
Iteration 12, loss = 0.00163004
Validation score: 0.837390
Iteration 13, loss = 0.00161024
Validation score: 0.830170
Iteration 14, loss = 0.00157001
Validation score: 0.821505
Iteration 15, loss = 0.00154639
Validation score: 0.844907
Iteration 16, loss = 0.00152386
Validation score: 0.831207
Iteration 17, loss = 0.00150701
Validation score: 0.850490
Iterat



0.858409721161965

In [11]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.01958298955941779
RMSE: 0.05023021059397274


In [12]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_own_length.model.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_own_length.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-with_own_length.scaler.joblib']

In [13]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_own_length.model.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_own_length.scaler.joblib')

In [14]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_own_length, model_norm=scaler)

133 ms ± 86.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_own_length, model_norm=scaler)

246 ms ± 701 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_own_length, model_norm=scaler)

487 ms ± 1.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Training an MLP (via sklearn) -- with target length

In [17]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'target_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10, stratify=mlp_input['cri_bin'])

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [18]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00391390
Validation score: 0.694992
Iteration 2, loss = 0.00253660
Validation score: 0.740284
Iteration 3, loss = 0.00222480
Validation score: 0.783014
Iteration 4, loss = 0.00205302
Validation score: 0.785186
Iteration 5, loss = 0.00194500
Validation score: 0.801734
Iteration 6, loss = 0.00187443
Validation score: 0.817123
Iteration 7, loss = 0.00182629
Validation score: 0.813273
Iteration 8, loss = 0.00176015
Validation score: 0.822293
Iteration 9, loss = 0.00170627
Validation score: 0.811524
Iteration 10, loss = 0.00167172
Validation score: 0.829243
Iteration 11, loss = 0.00162975
Validation score: 0.842387
Iteration 12, loss = 0.00159192
Validation score: 0.839982
Iteration 13, loss = 0.00157018
Validation score: 0.841612
Iteration 14, loss = 0.00153588
Validation score: 0.843328
Iteration 15, loss = 0.00150957
Validation score: 0.845626
Iteration 16, loss = 0.00148871
Validation score: 0.834957
Iteration 17, loss = 0.00146553
Validation score: 0.854826
Iterat



0.8653919039631917

In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.016264498565031114
RMSE: 0.04863778719180248


In [20]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_target_length.model.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_target_length.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-with_target_length.scaler.joblib']

In [21]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_target_length.model.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_target_length.scaler.joblib')

In [22]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_target_length, model_norm=scaler)

134 ms ± 403 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [23]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_target_length, model_norm=scaler)

248 ms ± 1.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_target_length, model_norm=scaler)

490 ms ± 2.43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Training an MLP (via sklearn) -- with both lengths

In [25]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'own_length', 'target_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10, stratify=mlp_input['cri_bin'])

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [26]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00309454
Validation score: 0.716809
Iteration 2, loss = 0.00219490
Validation score: 0.756268
Iteration 3, loss = 0.00198807
Validation score: 0.791904
Iteration 4, loss = 0.00187678
Validation score: 0.800557
Iteration 5, loss = 0.00178293
Validation score: 0.818743
Iteration 6, loss = 0.00171560
Validation score: 0.826659
Iteration 7, loss = 0.00165767
Validation score: 0.818349
Iteration 8, loss = 0.00159609
Validation score: 0.835646
Iteration 9, loss = 0.00155806
Validation score: 0.844040
Iteration 10, loss = 0.00151529
Validation score: 0.844515
Iteration 11, loss = 0.00148648
Validation score: 0.840580
Iteration 12, loss = 0.00146029
Validation score: 0.839596
Iteration 13, loss = 0.00142915
Validation score: 0.850809
Iteration 14, loss = 0.00140848
Validation score: 0.848565
Iteration 15, loss = 0.00138630
Validation score: 0.862471
Iteration 16, loss = 0.00136579
Validation score: 0.853059
Iteration 17, loss = 0.00135376
Validation score: 0.865703
Iterat



0.8833811537331612

In [27]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.0156228272716904
RMSE: 0.04543036182395727


In [28]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_both_lengths.model.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_both_lengths.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-with_both_lengths.scaler.joblib']

In [29]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_both_lengths.model.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_both_lengths.scaler.joblib')

In [30]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_both_length, model_norm=scaler)

135 ms ± 623 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [31]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_both_length, model_norm=scaler)

248 ms ± 854 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_both_length, model_norm=scaler)

496 ms ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Training an MLP (via sklearn) -- with no lengths

In [33]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10, stratify=mlp_input['cri_bin'])

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

In [34]:
regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00349063
Validation score: 0.708111
Iteration 2, loss = 0.00251164
Validation score: 0.731363
Iteration 3, loss = 0.00219936
Validation score: 0.782856
Iteration 4, loss = 0.00203311
Validation score: 0.787728
Iteration 5, loss = 0.00194867
Validation score: 0.785633
Iteration 6, loss = 0.00186464
Validation score: 0.796392
Iteration 7, loss = 0.00181158
Validation score: 0.799938
Iteration 8, loss = 0.00175430
Validation score: 0.807375
Iteration 9, loss = 0.00169564
Validation score: 0.827490
Iteration 10, loss = 0.00165498
Validation score: 0.828539
Iteration 11, loss = 0.00163154
Validation score: 0.834109
Iteration 12, loss = 0.00160294
Validation score: 0.825828
Iteration 13, loss = 0.00156151
Validation score: 0.846022
Iteration 14, loss = 0.00153543
Validation score: 0.840757
Iteration 15, loss = 0.00150576
Validation score: 0.845013
Iteration 16, loss = 0.00146814
Validation score: 0.825579
Iteration 17, loss = 0.00145852
Validation score: 0.841483
Iterat



0.8680810777097406

In [35]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.01917913256010124
RMSE: 0.04842021536015828


In [36]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr, './data/vcra-1w-mlp-hidden_256_32-with_no_lengths.model.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-with_no_lengths.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-with_no_lengths.scaler.joblib']

In [37]:
from joblib import dump, load
regr = load('./data/vcra-1w-mlp-hidden_256_32-with_no_lengths.model.joblib') 
scaler = load('./data/vcra-1w-mlp-hidden_256_32-with_no_lengths.scaler.joblib')

In [38]:
%%timeit
nne.calc_cri_timeslice(gdf_min_slice, model=regr, model_fun=nne.calc_cri_ours_with_no_length, model_norm=scaler)

135 ms ± 536 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [39]:
%%timeit
nne.calc_cri_timeslice(gdf_median_slice, model=regr, model_fun=nne.calc_cri_ours_with_no_length, model_norm=scaler)

253 ms ± 1.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [40]:
%%timeit
nne.calc_cri_timeslice(gdf_max_slice, model=regr, model_fun=nne.calc_cri_ours_with_no_length, model_norm=scaler)

494 ms ± 2.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
