In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

import sys, os
import sklearn
import datetime

import importlib
from tqdm import tqdm

In [42]:
import st_toolkit as geohl
importlib.reload(geohl);

import cri_calc as cri
importlib.reload(cri)

import cri_helper as helper
importlib.reload(helper)

import nn_evaluation as nne
importlib.reload(nne)

<module 'nn_evaluation' from '/Users/andrewt/Documents/Univ. Piraeus Research Center/VCRA/nn_evaluation.py'>

In [3]:
def calculate_cri_timeslice(df):
    timeslice_result = []
    
    for row_i in df.itertuples():
        for row_j in df.itertuples():
            if row_i.Index == row_j.Index:
                continue
                
            timeslice_result.append([row_i.Index, row_i.mmsi, row_i.geom, row_i.speed, row_i.course, 
                                     row_j.Index, row_j.mmsi, row_j.geom, row_j.speed, row_j.course, *nne.calculate_cri(row_i, row_j)])
            
#     return pd.DataFrame(timeslice_result, columns=['own', 'target', 'dcpa', 'tcpa', 'hr', 'rel_movement_angle', 'dist_euclid', 'speed_r', 'cri'])
    return pd.DataFrame(timeslice_result, columns=['own_Index', 'own_mmsi', 'own_geom', 'own_speed', 'own_course',
                                                   'target_Index', 'target_mmsi', 'target_geom', 'target_speed', 'target_course', 
                                                   'dcpa', 'tcpa', 'hr', 'rel_movement_angle', 'dist_euclid', 'speed_r', 'cri'])

# Loading and Preparing CRI Dataset

In [6]:
df = pd.read_csv('./data/unipi_ais_dynamic_jul2018_1w_algn_linear_v2_w_lens.csv', parse_dates=['datetime'])
gdf = geohl.getGeoDataFrame_v2(df, crs='epsg:4326')

gdf2 = gdf.loc[gdf.datetime.dt.date.between(datetime.date(2018, 7, 3), datetime.date(2018, 7, 3), inclusive='both')].copy()

  arr = construct_1d_object_array_from_listlike(values)


In [7]:
gdf_sub_moving = gdf2.loc[gdf2.speed.between(1, 50, inclusive='neither')].copy()
gdf_vcra = pd.read_pickle('./data/unipi_ais_dynamic_jul2018_1w_vcra_dataset_v3.pickle')

In [8]:
tqdm.pandas(desc='Adding Vessels\' Length...')

# gdf_vcra.loc[:, 'own_length'] = gdf_vcra.own_Index.apply(lambda l: gdf_sub_moving[l].length)
mlp_input = gdf_vcra.loc[gdf_vcra.own_Index.isin(gdf_sub_moving.index.values)].copy()
mlp_input.loc[:, 'own_length'] = mlp_input.own_Index.progress_apply(lambda l: gdf_sub_moving.loc[l].length)
mlp_input.loc[:, 'target_length'] = mlp_input.target_Index.progress_apply(lambda l: gdf_sub_moving.loc[l].length)
mlp_input.loc[:, 'cri_bin'] = pd.cut(mlp_input.cri, bins=np.arange(0, 1.1, .1), right=False,)

Adding Vessels' Length...: 100%|█████| 960268/960268 [00:36<00:00, 26657.63it/s]
Adding Vessels' Length...: 100%|█████| 960268/960268 [00:35<00:00, 26846.05it/s]


# Get a Reference Timeslice (for latency test)

In [9]:
grouped = gdf_sub_moving.groupby(['datetime'])
l = grouped.get_group((list(grouped.groups)[0]))

# Evaluating EQ model timeliness

In [21]:
%%timeit 
nne.calc_cri_timeslice(l.copy(), model=None, model_fun=nne.calculate_cri, model_norm=None);

162 ms ± 2.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Evaluating MLP-VCRA model (ours)

## Test using SVM/RVM dataset

In [1]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.neural_network import MLPRegressor

In [48]:
X = mlp_input[['dist_euclid', 'own_speed', 'target_speed', 'own_course', 'target_course', 'own_length']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.017, random_state=10, stratify=mlp_input['cri_bin'])

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

regr = MLPRegressor(random_state=10, max_iter=30, hidden_layer_sizes=(256, 32), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.01571137
Validation score: 0.191967
Iteration 2, loss = 0.00621171
Validation score: 0.298432
Iteration 3, loss = 0.00552288
Validation score: 0.336398
Iteration 4, loss = 0.00520527
Validation score: 0.386984
Iteration 5, loss = 0.00480917
Validation score: 0.398021
Iteration 6, loss = 0.00468020
Validation score: 0.444602
Iteration 7, loss = 0.00443829
Validation score: 0.443115
Iteration 8, loss = 0.00428625
Validation score: 0.430787
Iteration 9, loss = 0.00414758
Validation score: 0.505157
Iteration 10, loss = 0.00399364
Validation score: 0.498719
Iteration 11, loss = 0.00384677
Validation score: 0.508539
Iteration 12, loss = 0.00375856
Validation score: 0.545464
Iteration 13, loss = 0.00361395
Validation score: 0.538888
Iteration 14, loss = 0.00361620
Validation score: 0.530537
Iteration 15, loss = 0.00355355
Validation score: 0.569620
Iteration 16, loss = 0.00337693
Validation score: 0.568613
Iteration 17, loss = 0.00339709
Validation score: 0.569194
Iterat



0.6629508950420933

In [49]:
cri_pred = pd.Series(regr.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred, squared=False)}')

MAE: 0.03186096400767304
RMSE: 0.07716292280800398


In [50]:
from joblib import dump, load
dump(regr, './data/vcra-1w-mlp-hidden_256_32-ours-2pct.model.joblib') 
dump(scaler, './data/vcra-1w-mlp-hidden_256_32-ours-2pct.scaler.joblib') 

['./data/vcra-1w-mlp-hidden_256_32-ours-2pct.scaler.joblib']

# Compare with Park et al.

In [22]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from skrvm import RVR

In [23]:
X = mlp_input[['dist_euclid', 'hr', 'own_speed', 'target_speed', 'own_course', 'target_course', 'own_length', 'target_length']].copy()
X = X.values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.017, random_state=10, stratify=mlp_input['cri_bin'])

In [24]:
len(X_train)

16324

In [25]:
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)

clf = RVR(kernel='rbf', verbose=False, n_iter=100)
clf.fit(X_train_norm.astype(float), y_train.astype(float))
clf.score(scaler.transform(X_test), y_test)

0.635123842733207

In [26]:
cri_pred_rvm = pd.Series(clf.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred_rvm)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred_rvm, squared=False)}')

MAE: 0.03514679944133218
RMSE: 0.07917119646326023


In [27]:
from joblib import dump, load
dump(clf, './data/park-et-al-rvm-vcra-v2.model.joblib') 
dump(scaler, './data/park-et-al-rvm-vcra-v2.scaler.joblib') 

['./data/park-et-al-rvm-vcra-v2.scaler.joblib']

## Park et al. Timeliness

In [29]:
%%timeit 
nne.calc_cri_timeslice(l.copy(), model=clf, model_fun=nne.calc_cri_park_etal, model_norm=scaler);

169 ms ± 3.04 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Compare with Li et al.

In [30]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler

In [31]:
X = mlp_input[['speed_r', 'hr', 'rel_movement_angle', 'dist_euclid']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10, stratify=mlp_input['cri_bin'])

scaler = MinMaxScaler()
X_train_norm = scaler.fit_transform(X_train)

regr_li_et_al = MLPRegressor(random_state=10, max_iter=100, hidden_layer_sizes=(54,), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr_li_et_al.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.00791338
Validation score: 0.289756
Iteration 2, loss = 0.00616347
Validation score: 0.329453
Iteration 3, loss = 0.00584422
Validation score: 0.369999
Iteration 4, loss = 0.00550792
Validation score: 0.397080
Iteration 5, loss = 0.00532400
Validation score: 0.411601
Iteration 6, loss = 0.00522921
Validation score: 0.418708
Iteration 7, loss = 0.00515596
Validation score: 0.416603
Iteration 8, loss = 0.00510707
Validation score: 0.431201
Iteration 9, loss = 0.00507506
Validation score: 0.438309
Iteration 10, loss = 0.00504696
Validation score: 0.438351
Iteration 11, loss = 0.00501688
Validation score: 0.441701
Iteration 12, loss = 0.00494798
Validation score: 0.448401
Iteration 13, loss = 0.00491284
Validation score: 0.452802
Iteration 14, loss = 0.00488934
Validation score: 0.459039
Iteration 15, loss = 0.00487009
Validation score: 0.461232
Iteration 16, loss = 0.00485283
Validation score: 0.455573
Iteration 17, loss = 0.00483746
Validation score: 0.460634
Iterat

0.5171947533150325

In [32]:
cri_pred_mlp_li = pd.Series(regr_li_et_al.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred_mlp_li)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred_mlp_li, squared=False)}')

MAE: 0.04783689860808581
RMSE: 0.09278284485653365


In [33]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr_li_et_al, './data/li-et-al-mlp-vcra-v2.model.joblib') 
dump(scaler, './data/li-et-al-mlp-vcra-v2.scaler.joblib') 

['./data/li-et-al-mlp-vcra-v2.scaler.joblib']

## Test using SVM/RVM dataset

In [44]:
X = mlp_input[['speed_r', 'hr', 'rel_movement_angle', 'dist_euclid']].values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.017, random_state=10, stratify=mlp_input['cri_bin'])

scaler = MinMaxScaler()
X_train_norm = scaler.fit_transform(X_train)

regr_li_et_al = MLPRegressor(random_state=10, max_iter=100, hidden_layer_sizes=(54,), 
                    verbose=True, early_stopping=True, n_iter_no_change=10).fit(X_train_norm, y_train)

regr_li_et_al.score(scaler.transform(X_test), y_test)

Iteration 1, loss = 0.03670743
Validation score: -0.198026
Iteration 2, loss = 0.00991136
Validation score: -0.034789
Iteration 3, loss = 0.00903537
Validation score: 0.021141
Iteration 4, loss = 0.00864098
Validation score: 0.053188
Iteration 5, loss = 0.00842942
Validation score: 0.070614
Iteration 6, loss = 0.00829841
Validation score: 0.083399
Iteration 7, loss = 0.00819203
Validation score: 0.094093
Iteration 8, loss = 0.00810336
Validation score: 0.101888
Iteration 9, loss = 0.00800304
Validation score: 0.113588
Iteration 10, loss = 0.00792374
Validation score: 0.121288
Iteration 11, loss = 0.00785772
Validation score: 0.131031
Iteration 12, loss = 0.00778681
Validation score: 0.137307
Iteration 13, loss = 0.00773310
Validation score: 0.144982
Iteration 14, loss = 0.00766418
Validation score: 0.153377
Iteration 15, loss = 0.00758715
Validation score: 0.162641
Iteration 16, loss = 0.00750363
Validation score: 0.173731
Iteration 17, loss = 0.00743754
Validation score: 0.181046
Iter



0.37438923300147486

In [46]:
cri_pred_mlp_li = pd.Series(regr_li_et_al.predict(scaler.transform(X_test))).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred_mlp_li)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred_mlp_li, squared=False)}')

MAE: 0.057214222051195696
RMSE: 0.10584798562451508


In [47]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr_li_et_al, './data/li-et-al-mlp-vcra-v2-2pct.model.joblib') 
dump(scaler, './data/li-et-al-mlp-vcra-v2-20pct.scaler.joblib') 

['./data/li-et-al-mlp-vcra-v2-20pct.scaler.joblib']

## Li et al. Timeliness

In [35]:
%%timeit 
nne.calc_cri_timeslice(l.copy(), model=regr_li_et_al, model_fun=nne.calc_cri_li_etal, model_norm=scaler);

167 ms ± 3.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Compare with Gang et al.

In [36]:
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [37]:
X = mlp_input[['own_course', 'target_course', 'own_speed', 'target_speed', 'hr', 'dist_euclid']].copy()
X = X.values
y = mlp_input[['cri']].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.017, random_state=10, stratify=mlp_input['cri_bin'])

In [38]:
len(X_train)

16324

In [39]:
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

regr_gang_et_al = SVR(verbose=True).fit(X_train_norm, y_train)
regr_gang_et_al.score(X_test_norm, y_test)

[LibSVM]....
*....
*.
*
optimization finished, #iter = 8625
obj = -238.470603, rho = -0.163853
nSV = 1738, nBSV = 1517


0.3959955678603929

In [40]:
cri_pred_svm_gang = pd.Series(regr_gang_et_al.predict(X_test_norm)).clip(0,1).values
print(f'MAE: {mean_absolute_error(y_test, cri_pred_svm_gang)}')
print(f'RMSE: {mean_squared_error(y_test, cri_pred_svm_gang, squared=False)}')

MAE: 0.06328110434071546
RMSE: 0.10319976901667206


In [41]:
from joblib import dump, load
# dump(regr, './data/vcra-1w-mlp-hidden_128_32_8-parkj.joblib') 
dump(regr_gang_et_al, './data/gang-et-al-svm-vcra-v2.model.joblib') 
dump(scaler, './data/gang-et-al-svm-vcra-v2.scaler.joblib') 

['./data/gang-et-al-svm-vcra-v2.scaler.joblib']

## Gang et al. Timeliness

In [43]:
%%timeit 
nne.calc_cri_timeslice(l.copy(), model=regr_gang_et_al, model_fun=nne.calc_cri_gang_etal, model_norm=scaler);

179 ms ± 5.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
