In [4]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import configparser
import pickle
import psycopg2
import psycopg2.extras
import contextily as ctx
import multiprocessing as mp

In [5]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Coding/Python'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')

style.use('ggplot')

get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [6]:
PLT_IMAGE_WIDTH = 3.748
PLT_IMAGE_HEIGHT = PLT_IMAGE_WIDTH/1.618

plt.rc('text', usetex=True)
plt.rc('font', family='sans-serif', size=8)
plt.rcParams['figure.figsize'] = (PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT)

# Reading the Vessels' MMSIs

In [7]:
CLUSTER_ID = 0

mmsi_list = pd.read_pickle(os.path.join('.', 'data/pkl/mmsi_list.pckl'))[CLUSTER_ID]
mmsi_list_window_size = mp.cpu_count()-1

In [8]:
%%time
properties = configparser.ConfigParser()

properties = configparser.ConfigParser()
properties.read(os.path.join('.','sql_server.ini'))
properties = properties['SERVER']

host    = properties['host']
db_name = properties['db_name']
uname   = properties['uname']
pw      = properties['pw']
port    = properties['port']

query = "select * from ais_data.dynamic_ships_min_trip_card_2_segmented_12h where mmsi in %s;"
con = psycopg2.connect(database=db_name, user=uname, password=pw, host=host, port=port)

Wall time: 141 ms


In [17]:
con.close() # close the jupyter connection

In [9]:
cmap = plt.cm.tab20

In [10]:
from scipy.interpolate import interp1d

def resample_geospatial(df, features=['lat', 'lon'], rule='60S', method='linear', crs={'init': 'epsg:4326'}, drop_lon_lat=False):
    df['datetime'] = pd.to_datetime(df['ts'], unit='s')
    x = df['datetime'].values.astype(np.int64)
    y = df[features].values

    # scipy interpolate needs at least 2 records 
    if (len(df) <= 1):
        return df.iloc[0:0]

    f = interp1d(x, y, kind=method, axis=0)
    xnew_V2 = pd.date_range(start=df['datetime'].min().replace(second=0), end=df['datetime'].max().replace(second=0), freq=rule, closed='right')

    df_RESAMPLED = pd.DataFrame(f(xnew_V2), columns=features)
    df_RESAMPLED.loc[:, 'datetime'] = xnew_V2
    df_RESAMPLED.loc[:, 'geom'] = df_RESAMPLED[['lon', 'lat']].apply(lambda x: Point(x[0], x[1]), axis=1)

    #drop lat and lon if u like
    if drop_lon_lat:
        df_RESAMPLED = df_RESAMPLED.drop(['lat', 'lon'], axis=1)
    return gpd.GeoDataFrame(df_RESAMPLED, crs=crs, geometry='geom')

In [12]:
from tqdm import tqdm_notebook
from functools import partial

def __parallelize_resampling(x, features=['lat', 'lon'], rule='60S', method='linear', crs={'init': 'epsg:4326'}, drop_lon_lat=False):
    print (f'Resampling for mmsi:{x.mmsi.unique()[0]}')
    y = x.groupby(['trip_id'], group_keys=False).apply(resample_geospatial, features=features, rule=rule, method=method, crs=crs, drop_lon_lat=drop_lon_lat)
    print (f'Resampling for mmsi:{x.mmsi.unique()[0]} Complete')
    return y
        
def parallelize_resampling(df, features=['lat', 'lon'], rule='60S', method='linear', crs={'init': 'epsg:4326'}, drop_lon_lat=False):
    num_cores = mp.cpu_count()-1  #leave one free to not freeze machine
    df_split = [df.loc[df.mmsi==mmsi] for mmsi in df.mmsi.unique()]
    
    func = partial(__parallelize_resampling, features=features, rule=rule, method=method, crs=crs, drop_lon_lat=drop_lon_lat)
    pool = mp.Pool(num_cores)
    
    res = pd.concat(pool.map(func, df_split))
    print (f'Resampling Complete!')
    pool.close()
    pool.join()
    return res

In [None]:
for i in range(0, len(mmsi_list), mmsi_list_window_size):
    print ('Fetching Data....')
    mmsis = mmsi_list[i:i+mmsi_list_window_size]
    dfTmp = pd.read_sql_query(query%(tuple(mmsis),), con=con)
    df2 = parallelize_resampling(dfTmp, features=dfTmp.columns, rule=f'{1*60}S')

Fetching Data....


In [15]:
gsplt.map_plot(df2.loc[(df2.mmsi==228037600) & (df2.trip_id == 7)], color=[cmap(0)])

In [16]:
tmp = df.loc[(df.mmsi==228037600) & (df.trip_id == 7)]
gsplt.map_plot(gspp.gdf_from_df(tmp), color=[cmap(0)])