In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import configparser
import pickle
import psycopg2
import psycopg2.extras
import contextily as ctx

In [2]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')

style.use('ggplot')

get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [3]:
PLT_IMAGE_WIDTH = 3.748
PLT_IMAGE_HEIGHT = PLT_IMAGE_WIDTH/1.618

plt.rc('text', usetex=True)
plt.rc('font', family='sans-serif', size=8)
plt.rcParams['figure.figsize'] = (PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT)

In [4]:
def tuple_str_to_tuple(_str_):
    return tuple(map(int, _str_[1:-1].split(',')))

def tuple_str_to_list(_str_):
    return list(map(int, _str_[1:-1].split(',')))

In [5]:
def filter_patterns_on_vesseltype(df, min_cardinality, vesseltype_mmsis):
    filtered_patterns = df.copy()
    filtered_patterns.loc[:,'clusters'] = filtered_patterns.apply(lambda x: np.array(tuple_str_to_list(x.clusters))[np.isin(tuple_str_to_list(x.clusters), vesseltype_mmsis)], axis=1)
    filtered_patterns = filtered_patterns.loc[filtered_patterns.clusters.apply(len) > min_cardinality]
    
    return filtered_patterns

# Reading Data 
* ### mode: {Convoys, Flocks}
* ### card:5
* ### dt:15
* ### dist:1852

In [6]:
mmsi_fisheries = np.load('data/pkl/fisheries_mmsis.npy').flatten()
mmsi_cargos = np.load('data/pkl/cargos_mmsis.npy').flatten()
mmsi_tankers = np.load('data/pkl/tankers_mmsis.npy').flatten()


df_convoys = pd.read_csv('./data/csv/GROUP_PATTERNS/convoys_card_5_dt_15_dist_1852.csv')
df_flocks = pd.read_csv('./data/csv/GROUP_PATTERNS/flocks_card_5_dt_15_dist_1852.csv')

# **#4** Trip Contribution per Group Pattern (Convoys/Flocks)

In [7]:
%%time
host    = 'snf-863583.vm.okeanos.grnet.gr'
db_name = 'zenodo'
uname   = 'students'
pw      = 'infol@bdbl@bs2017'
port    = '46132'

con = psycopg2.connect(database=db_name, user=uname, password=pw, host=host, port=port)
query = 'SELECT * FROM ais_data.dynamic_ships_min_trip_card_3_segmented_12h_resampled_1min_v2 WHERE mmsi IN %s AND datetime BETWEEN \'%s\' AND \'%s\';'

CPU times: user 0 ns, sys: 5.24 ms, total: 5.24 ms
Wall time: 161 ms


# **#4.1** Trip Contribution per CONVOYS/FLOCKS of FISHERIES

In [9]:
df_convoys_of_fisheries = filter_patterns_on_vesseltype(df_convoys, 5, mmsi_fisheries)
df_flocks_of_fisheries = filter_patterns_on_vesseltype(df_flocks, 5, mmsi_fisheries)

In [22]:
import multiprocessing
from tqdm import tqdm_notebook

def classify_trips_v2(df):        
    if df['port_label'].iloc[0] == -1 and df['port_label'].iloc[-1] == -1:
        return np.array([1])
    elif df['port_label'].iloc[0] == -1 and df['port_label'].iloc[-1] == 0:
        return np.array([2])
    elif df['port_label'].iloc[0] == 0 and df['port_label'].iloc[-1] == -1:
        return np.array([3])
    elif df['port_label'].iloc[0] == 0 and df['port_label'].iloc[-1] == 0:
        return np.array([4])
    else:
        print ("$H1T!!!")


def get_gp_trip_contributions_parallel(y):
    print ('Connecting to Database...')
    con = psycopg2.connect(database=db_name, user=uname, password=pw, host=host, port=port)
    print ('Connected to Database!')
    
    df_stat4 = []
    for row in tqdm_notebook(y.itertuples(), total=len(y)):
        df_stat4_row = pd.DataFrame([{'GP':row.clusters, 'C1':0, 'C2':0, 'C3':0, 'C4':0}], columns=['GP', 'C1', 'C2', 'C3', 'C4'])

        row_dynamic = pd.read_sql_query(query%(tuple(row.clusters), row.st, row.et), con=con)
#         row_dynamic = gspp.gdf_from_df(row_dynamic)
#         row_dynamic = points_within_geometry(row_dynamic, ports)
        
        row_dynamic_trips = row_dynamic.groupby(['mmsi', 'trip_id']).apply(classify_trips_v2).to_frame()
        trip_contr = row_dynamic_trips[0].value_counts().dropna().index.values
        df_stat4_row.iloc[:, trip_contr[0][0]] = 1

        df_stat4.append(df_stat4_row)
    con.close()
    return pd.concat(df_stat4)


def parallelize_dataframe(df_par, func, n_cores=7):
#     num_cores = multiprocessing.cpu_count()-1  #leave one free to not freeze machine
    num_cores = n_cores #leave one free to not freeze machine
    num_partitions = num_cores #number of partitions to split dataframe
    
    df_split = np.array_split(df_par, num_partitions)
    pool = multiprocessing.Pool(num_cores)
    
    df_res = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df_res

In [11]:
df_convoys_of_fisheries_trip_contributions = parallelize_dataframe(df_convoys_of_fisheries, get_gp_trip_contributions_parallel)

Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connected to Database!
Connected to Database!
Connected to Database!
Connected to Database!


HBox(children=(IntProgress(value=0, max=415), HTML(value='')))

HBox(children=(IntProgress(value=0, max=416), HTML(value='')))

HBox(children=(IntProgress(value=0, max=416), HTML(value='')))

Connected to Database!
Connected to Database!


HBox(children=(IntProgress(value=0, max=416), HTML(value='')))

Connected to Database!


HBox(children=(IntProgress(value=0, max=416), HTML(value='')))

HBox(children=(IntProgress(value=0, max=416), HTML(value='')))

HBox(children=(IntProgress(value=0, max=416), HTML(value='')))










In [12]:
df_flocks_of_fisheries_trip_contributions = parallelize_dataframe(df_flocks_of_fisheries, get_gp_trip_contributions_parallel)

Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connecting to Database...
Connected to Database!
Connected to Database!


HBox(children=(IntProgress(value=0, max=2985), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2985), HTML(value='')))

Connected to Database!
Connected to Database!
Connected to Database!
Connected to Database!


HBox(children=(IntProgress(value=0, max=2985), HTML(value='')))

Connected to Database!


HBox(children=(IntProgress(value=0, max=2985), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2984), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2984), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2984), HTML(value='')))










In [13]:
df_convoys_of_fisheries_trip_contributions.to_csv('./data/csv/stats/GP_STATS/convoys_of_fisheries_trip_contributions_V2.csv', index=False, header=True)
df_flocks_of_fisheries_trip_contributions.to_csv('./data/csv/stats/GP_STATS/flocks_of_fisheries_trip_contributions_V2.csv', index=False, header=True)

In [38]:
df_flocks_of_fisheries_trip_contributions.iloc[:,[1,2,3,4]].sum()

C1        0
C2        6
C3        2
C4    20884
dtype: int64

# **#4.2** Trip Contribution per CONVOYS/FLOCKS of CARGOS

In [20]:
df_convoys_of_cargos = filter_patterns_on_vesseltype(df_convoys, 5, mmsi_cargos)
df_flocks_of_cargos = filter_patterns_on_vesseltype(df_flocks, 5, mmsi_cargos)

In [23]:
df_convoys_of_cargos_trip_contributions = parallelize_dataframe(df_convoys_of_cargos, get_gp_trip_contributions_parallel, n_cores=1)

Connecting to Database...
Connected to Database!


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [26]:
try: 
    df_flocks_of_cargos_trip_contributions = parallelize_dataframe(df_flocks_of_cargos, get_gp_trip_contributions_parallel, n_cores=1)
except ValueError:
    df_flocks_of_cargos_trip_contributions = pd.DataFrame()

Connecting to Database...
Connected to Database!


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [27]:
df_convoys_of_cargos_trip_contributions.to_csv('./data/csv/stats/GP_STATS/convoys_of_cargos_trip_contributions_V2.csv', index=False, header=True)
df_flocks_of_cargos_trip_contributions.to_csv('./data/csv/stats/GP_STATS/flocks_of_cargos_trip_contributions_V2.csv', index=False, header=True)

In [39]:
df_convoys_of_cargos_trip_contributions.iloc[:,[1,2,3,4]].sum()

C1    0
C2    0
C3    0
C4    3
dtype: int64

# **#4.3** Trip Contribution per CONVOYS/FLOCKS of TANKERS

In [28]:
df_convoys_of_tankers = filter_patterns_on_vesseltype(df_convoys, 5, mmsi_tankers)
df_flocks_of_tankers = filter_patterns_on_vesseltype(df_flocks, 5, mmsi_tankers)

In [29]:
try:
    df_convoys_of_tankers_trip_contributions = parallelize_dataframe(df_convoys_of_tankers, get_gp_trip_contributions_parallel, n_cores=1)
except ValueError:
    df_convoys_of_tankers_trip_contributions = pd.DataFrame()

Connecting to Database...
Connected to Database!


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [31]:
try:
    df_flocks_of_tankers_trip_contributions = parallelize_dataframe(df_flocks_of_tankers, get_gp_trip_contributions_parallel, n_cores=1)
except ValueError:
    df_flocks_of_tankers_trip_contributions = pd.DataFrame()

Connecting to Database...
Connected to Database!


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [32]:
df_convoys_of_tankers_trip_contributions.to_csv('./data/csv/stats/GP_STATS/convoys_of_tankers_trip_contributions_V2.csv', index=False, header=True)
df_convoys_of_tankers_trip_contributions.to_csv('./data/csv/stats/GP_STATS/flocks_of_tankers_trip_contributions_V2.csv', index=False, header=True)

# **#4.3** Pie Charts

In [54]:
tmp = df_convoys_of_fisheries_trip_contributions[[f'C{i}' for i in range(1,5)]].sum(axis=0).sum()

df_convoys_of_fisheries_trip_contributions[[f'C{i}' for i in range(1,5)]].sum(axis=0)

C1       3
C2      32
C3      11
C4    2865
dtype: int64

In [83]:
explode = (0.04, 0, 0, 0)  
cmap=plt.cm.tab20c
colors = [cmap(i) for i in range(4)]

for ves_type, df_F in zip(['fisheries', 'cargos', 'tankers'], [df_flocks_of_fisheries_trip_contributions, df_flocks_of_cargos_trip_contributions, df_flocks_of_tankers_trip_contributions]):
    if (len(df_F) == 0):
        print (f'{ves_type} have 0 patterns')
        continue
    
    plt.figure()
    ax = df_F[[f'C{i}' for i in range(1,5)]].sum(axis=0).plot.pie(y='', 
                       figsize=(PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT), 
                       explode=explode,
                       startangle=0,
#                        autopct='%1.2f\%%', 
                       autopct='',
                       pctdistance=0.67,
                       colors=colors,
                       legend=False)
    ax.axis('equal')

    for text in ax.texts:
        text.set_fontsize(8)
        if ' ' in text.get_text():
            text.set_color('grey')
#         if '0.0\%' == text.get_text():
        text.set_text('')
            
    tmp = df_F[[f'C{i}' for i in range(1,5)]].sum(axis=0)
            
    plt.ylabel('')
    plt.legend([f'C1 - {np.around(tmp[0]/tmp.sum()*100, 2)}\%', f'C2 - {np.around(tmp[1]/tmp.sum()*100, 2)}\%', f'C3 - {np.around(tmp[2]/tmp.sum()*100, 2)}\%', f'C4 - {np.around(tmp[3]/tmp.sum()*100, 2)}\%'],
              frameon=False, loc='upper center', bbox_to_anchor=(0.5, 0), fancybox=False, ncol=2, borderaxespad=0.7)
    plt.title(f'Cliques of {ves_type} trip contribution')
    plt.savefig(f'./gp_plots_V5/Flocks_of_{ves_type}_Trip_Contribution_V2.pdf', dpi=350, bbox_inches='tight')

cargos have 0 patterns
tankers have 0 patterns


In [82]:
explode = (0.04, 0, 0, 0)  

for ves_type, df_C in zip(['fisheries', 'cargos', 'tankers'], [df_convoys_of_fisheries_trip_contributions, df_convoys_of_cargos_trip_contributions, df_convoys_of_tankers_trip_contributions]):
    if (len(df_C) == 0):
        print (f'{ves_type} have 0 patterns')
        continue
        
    plt.figure()
    ax = df_C[[f'C{i}' for i in range(1,5)]].sum(axis=0).plot.pie(y='', 
                       figsize=(PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT), 
                       explode=explode,
                       startangle=0,
#                        autopct='%1.2f\%%', 
                       autopct='', 
                       pctdistance=0.67,
                       colors=colors,
                       legend=False)
    ax.axis('equal')

    for text in ax.texts:
        text.set_fontsize(8)
        if ' ' in text.get_text():
            text.set_color('grey')
#         if '0.0\%' == text.get_text():
        text.set_text('')
            
    tmp = df_C[[f'C{i}' for i in range(1,5)]].sum(axis=0)
    
    plt.ylabel('')
    plt.legend([f'C1 - {np.around(tmp[0]/tmp.sum()*100, 2)}\%', f'C2 - {np.around(tmp[1]/tmp.sum()*100, 2)}\%', f'C3 - {np.around(tmp[2]/tmp.sum()*100, 2)}\%', f'C4 - {np.around(tmp[3]/tmp.sum()*100, 2)}\%'],
               frameon=False, loc='upper center', bbox_to_anchor=(0.5, 0), fancybox=False, ncol=2, borderaxespad=0.7)
    
    plt.title(f'MCS of {ves_type} trip contribution')
    plt.savefig(f'./gp_plots_V5/Convoys_of_{ves_type}_Trip_Contribution_V2.pdf', dpi=350, bbox_inches='tight')

tankers have 0 patterns


In [32]:
df_convoys_of_fisheries_trip_contributions = pd.read_csv('./data/csv/stats/GP_STATS/convoys_of_fisheries_trip_contributions_V2.csv')
df_flocks_of_fisheries_trip_contributions = pd.read_csv('./data/csv/stats/GP_STATS/flocks_of_fisheries_trip_contributions_V2.csv')

df_convoys_of_cargos_trip_contributions = pd.read_csv('./data/csv/stats/GP_STATS/convoys_of_cargos_trip_contributions_V2.csv')
try:
    df_flocks_of_cargos_trip_contributions = pd.read_csv('./data/csv/stats/GP_STATS/flocks_of_cargos_trip_contributions_V2.csv')
except:
    df_flocks_of_cargos_trip_contributions = pd.DataFrame()

try:
    df_convoys_of_tankers_trip_contributions = pd.read_csv('./data/csv/stats/GP_STATS/convoys_of_tankers_trip_contributions_V2.csv')
except:
    df_convoys_of_tankers_trip_contributions = pd.DataFrame()

try:
    df_flocks_of_tankers_trip_contributions = pd.read_csv('./data/csv/stats/GP_STATS/flocks_of_tankers_trip_contributions_V2.csv')
except:
    df_flocks_of_tankers_trip_contributions = pd.DataFrame()

In [44]:
# df_convoys_of_fisheries_trip_contributions.sum(axis=0)
# len(df_convoys_of_fisheries_trip_contributions)

# df_flocks_of_fisheries_trip_contributions.sum(axis=0)
# len(df_flocks_of_fisheries_trip_contributions)


# df_convoys_of_cargos_trip_contributions.sum(axis=0)
# len(df_convoys_of_cargos_trip_contributions)

# df_flocks_of_cargos_trip_contributions.sum(axis=0)
# len(df_flocks_of_cargos_trip_contributions)


# df_convoys_of_tankers_trip_contributions.sum(axis=0)
# len(df_convoys_of_tankers_trip_contributions)

# df_flocks_of_tankers_trip_contributions.sum(axis=0)
# len(df_flocks_of_tankers_trip_contributions)

0    0
dtype: int64