In [22]:
import pandas as pd
import numpy as np

In [2]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp

In [3]:
fisheries = pd.read_csv('data/csv/fisheries_dynamic_real_points_v3.csv')

In [4]:
%%time
fisheries = gspp.gdf_from_df(fisheries)

CPU times: user 1min 8s, sys: 400 ms, total: 1min 8s
Wall time: 1min 7s


In [5]:
fisheries.sort_values('ts', inplace=True)

In [6]:
fisheries.reset_index(drop=True, inplace=True)

In [7]:
fisheries

Unnamed: 0,mmsi,course,lon,lat,ts,velocity,trip_id,datetime,geom
0,228131600,263.7,-4.644325,48.092247,1443650404,11.590369,0,2015-09-30 22:00:04,POINT (-4.644325 48.092247)
1,228037600,90.2,-4.448573,48.157547,1443650409,12.111672,0,2015-09-30 22:00:09,POINT (-4.448573000000001 48.157547)
2,227443000,171.5,-4.782632,48.005634,1443650411,12.628942,0,2015-09-30 22:00:11,POINT (-4.782632 48.005634)
3,228131600,265.2,-4.644965,48.092180,1443650416,12.593468,0,2015-09-30 22:00:16,POINT (-4.644965 48.09218)
4,228037600,87.6,-4.448013,48.157574,1443650419,13.535968,0,2015-09-30 22:00:19,POINT (-4.4480133 48.157574)
5,227443000,172.6,-4.782532,48.005116,1443650420,10.235945,0,2015-09-30 22:00:20,POINT (-4.7825317 48.005116)
6,228131600,260.9,-4.645487,48.092130,1443650425,14.013235,0,2015-09-30 22:00:25,POINT (-4.645487 48.09213)
7,228037600,87.2,-4.447327,48.157630,1443650430,15.065887,0,2015-09-30 22:00:30,POINT (-4.4473267 48.15763)
8,227443000,173.7,-4.782448,48.004600,1443650431,12.154994,0,2015-09-30 22:00:31,POINT (-4.7824483 48.0046)
9,228131600,269.4,-4.646070,48.092106,1443650434,12.523681,0,2015-09-30 22:00:34,POINT (-4.64607 48.092106)


## Calculating the Basic Features of the ML Dataset

In [8]:
fisheries_ml_env = fisheries.groupby(['mmsi', 'trip_id'], group_keys=False).apply(lambda x: x[['ts', 'lon', 'lat', 'velocity']].values).reset_index()

In [36]:
fisheries.loc[(fisheries.mmsi == 219017843) & (fisheries.trip_id == 0)]

Unnamed: 0,mmsi,course,lon,lat,ts,velocity,trip_id,datetime,geom
602983,219017843,96.0,-4.375318,48.128647,1448336331,5.473559,0,2015-11-24 03:38:51,POINT (-4.3753185 48.128647)
602984,219017843,97.1,-4.375168,48.128624,1448336337,0.094769,0,2015-11-24 03:38:57,POINT (-4.3751683 48.128624)


In [37]:
np.set_printoptions(suppress=True, formatter={'float_kind':'{:f}'.format})

print(fisheries_ml_env.values)

[[219017843 0
  array([[1448336331.000000, -4.375318, 48.128647, 5.473559],
       [1448336337.000000, -4.375168, 48.128624, 0.094769]])]
 [219017843 1
  array([[1457928735.000000, -4.833392, 48.205166, 17.184613],
       [1457928815.000000, -4.827582, 48.202568, 16.307824],
       [1457928856.000000, -4.824772, 48.201270, 17.425203],
       [1457929115.000000, -4.805262, 48.193806, 17.612581],
       [1457929175.000000, -4.800658, 48.192154, 15.831242],
       [1457929186.000000, -4.799900, 48.191880, 19.093977],
       [1457929195.000000, -4.799163, 48.191580, 16.723447],
       [1457929217.000000, -4.797567, 48.190987, 17.748043],
       [1457929357.000000, -4.786535, 48.187744, 19.301054],
       [1457929385.000000, -4.784133, 48.187046, 17.765690],
       [1457929446.000000, -4.779326, 48.185616, 19.500884],
       [1457929456.000000, -4.778463, 48.185352, 15.767594],
       [1457929466.000000, -4.777767, 48.185135, 20.457170],
       [1457929476.000000, -4.776855, 48.184880, 15.9

In [39]:
np.save('data/npy/fisheries_dynamic_V3_ml_extra_features_V2.npy', fisheries_ml_env[0].values) 

#### Save Vessel Trip Metadata

In [72]:
fisheries_ml_env_meta = fisheries_ml_env.iloc[:,:2]
fisheries_ml_env_meta['id'] = fisheries_ml_env_meta.index
fisheries_ml_env_meta.to_csv('data/npy/fisheries_dynamic_V3_ml_metadata.csv', header=True, index=False)

In [73]:
fisheries_ml_env_meta.head()

Unnamed: 0,mmsi,trip_id,id
0,219017843,0,0
1,219017843,1,1
2,219017843,2,2
3,224038000,0,3
4,224038000,1,4


#### Acknowledging the Correctness of the Result
* #### Comparing the Trajectory Length from the Original Data with the ML Data using their Respective MetaData

In [41]:
%%time
test = np.load('data/npy/fisheries_dynamic_V3_ml_extra_features_V2.npy')
test_meta = pd.read_csv('data/npy/fisheries_dynamic_V3_ml_metadata.csv')

for traj in test_meta.itertuples():
    traj_tmp = len(fisheries.loc[(fisheries.mmsi == traj.mmsi) & (fisheries.trip_id == traj.trip_id)])
    traj_tmp_ml = test[traj.Index].shape[0]
    
    if not traj_tmp == traj_tmp_ml:
        print ('SHIT')

CPU times: user 2min 48s, sys: 15.8 ms, total: 2min 48s
Wall time: 42.5 s


In [20]:
test

array([array([[-4.3753185 , 48.128647  ,  5.47355923],
       [-4.3751683 , 48.128624  ,  0.0947689 ]]),
       array([[-4.8333917 , 48.205166  , 17.18461288],
       [-4.827582  , 48.202568  , 16.30782413],
       [-4.824772  , 48.20127   , 17.42520349],
       [-4.8052616 , 48.193806  , 17.6125806 ],
       [-4.800658  , 48.192154  , 15.83124172],
       [-4.7999    , 48.19188   , 19.09397672],
       [-4.7991633 , 48.19158   , 16.72344703],
       [-4.797567  , 48.190987  , 17.74804262],
       [-4.786535  , 48.187744  , 19.30105444],
       [-4.7841334 , 48.187046  , 17.76569047],
       [-4.7793264 , 48.185616  , 19.50088379],
       [-4.7784634 , 48.185352  , 15.76759353],
       [-4.7777667 , 48.185135  , 20.4571702 ],
       [-4.776855  , 48.18488   , 15.96958048],
       [-4.7761536 , 48.184647  , 18.72712986],
       [-4.77532   , 48.18441   , 17.46984855],
       [-4.774628  , 48.184185  , 18.36049854],
       [-4.7737317 , 48.18392   , 20.09422079],
       [-4.7729216 , 48.

## Calculating the Extra Features of the ML Dataset
* ## Acceleration and Bearing

In [None]:
%%time
fisheries_extra_features_acceleration = fisheries.groupby(['mmsi', 'trip_id'], group_keys=False).apply(lambda x: gspp.calculate_acceleration(x))

In [None]:
fisheries_extra_features_acceleration

In [None]:
%%time
fisheries_extra_features_bearing = fisheries_extra_features_acceleration.groupby(['mmsi', 'trip_id'], group_keys=False).apply(lambda x: gspp.calculate_bearing(x))

In [None]:
fisheries_extra_features_bearing

In [None]:
fisheries_ml_env_v2_extra_features = fisheries_extra_features_bearing.groupby(['mmsi', 'trip_id'], group_keys=False).apply(lambda x: x[['lon', 'lat', 'velocity', 'acceleration', 'bearing']].values).reset_index()

In [None]:
np.save('data/npy/fisheries_dynamic_V3_ml_extra_features_acceleration_bearing.npy', fisheries_ml_env_v2_extra_features[0].values) 

## Organizing .npy Files with their Metadata

In [None]:
fisheries_extra_features_bearing[0].apply(len)

In [None]:
fisheries_ml_env_v2_extra_features[0].apply(len)

In [None]:
fisheries_ml_env[0].apply(len)