In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import configparser
import pickle
import psycopg2
import psycopg2.extras
import contextily as ctx

In [2]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Coding/Python/'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')
from matplotlib.ticker import PercentFormatter

style.use('ggplot')

get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [3]:
PLT_IMAGE_WIDTH = 3.748
PLT_IMAGE_HEIGHT = PLT_IMAGE_WIDTH/1.618

plt.rc('text', usetex=True)
plt.rc('font', family='sans-serif', size=8)
plt.rcParams['figure.figsize'] = (PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT)

In [4]:
def tuple_str_to_tuple(_str_):
    return tuple(map(int, _str_[1:-1].split(',')))

def tuple_str_to_list(_str_):
    return list(map(int, _str_[1:-1].split(',')))

# Reading Data

In [5]:
# df = pd.read_csv('./data/csv/stats/GP_STATS/GP_STATS1.csv')
df = pd.concat([pd.read_csv(f'./data/csv/stats/GP_STATS/GP_STATS{i}.csv') for i in range(8)], ignore_index=True)
print(len(df))
df.head()

72


Unnamed: 0,FILENAME,GP.TYPE,#GPs,AVG.DURATION,AVG.SIZE,AVG.VELOCITY,AVG.DISTANCE
0,flocks_card_5_dt_15_dist_2315,flocks,55667,50.580488,6.68299,4.220999,2.765673
1,flocks_card_5_dt_15_dist_463,flocks,29,35.793103,5.103448,1.965971,0.810899
2,convoys_card_5_dt_30_dist_926,convoys,1188,86.685185,7.084175,3.33919,2.858
3,convoys_card_5_dt_60_dist_1852,convoys,1430,132.352448,7.169231,3.459315,5.865499
4,convoys_card_5_dt_15_dist_1389,convoys,3606,60.942318,7.549362,3.936768,3.076762


In [6]:
df.rename(columns={"#GPs": "\#GPs", 
                   'AVG.DURATION':'AVG_DURATION', 
                   'AVG.SIZE':'AVG_SIZE',
                   'AVG.VELOCITY': 'AVG_VELOCITY',
                   'AVG.DISTANCE':'AVG_DISTANCE'}, inplace=True)

df.loc[:,'card'] = df['FILENAME'].apply(lambda x: int(x.split('_')[2]))
df.loc[:,'dt'] = df['FILENAME'].apply(lambda x: int(x.split('_')[4]))
df.loc[:,'dist'] = df['FILENAME'].apply(lambda x: int(x.split('_')[6]))
df.head()

Unnamed: 0,FILENAME,GP.TYPE,\#GPs,AVG_DURATION,AVG_SIZE,AVG_VELOCITY,AVG_DISTANCE,card,dt,dist
0,flocks_card_5_dt_15_dist_2315,flocks,55667,50.580488,6.68299,4.220999,2.765673,5,15,2315
1,flocks_card_5_dt_15_dist_463,flocks,29,35.793103,5.103448,1.965971,0.810899,5,15,463
2,convoys_card_5_dt_30_dist_926,convoys,1188,86.685185,7.084175,3.33919,2.858,5,30,926
3,convoys_card_5_dt_60_dist_1852,convoys,1430,132.352448,7.169231,3.459315,5.865499,5,60,1852
4,convoys_card_5_dt_15_dist_1389,convoys,3606,60.942318,7.549362,3.936768,3.076762,5,15,1389


In [7]:
df['AVG_DISTANCE'] = df['AVG_DISTANCE'].apply(lambda x: x*0.539956803)
df['dist'] = df['dist'].apply(lambda x: np.around(x/1000*0.539956803, 2))

In [8]:
(df['card'].unique(), df['dt'].unique(), df['dist'].unique())

(array([ 5,  3,  8, 12], dtype=int64),
 array([15, 30, 60, 45, 10], dtype=int64),
 array([1.25, 0.25, 0.5 , 1.  , 0.75]))

In [9]:
df.columns

Index(['FILENAME', 'GP.TYPE', '\#GPs', 'AVG_DURATION', 'AVG_SIZE',
       'AVG_VELOCITY', 'AVG_DISTANCE', 'card', 'dt', 'dist'],
      dtype='object')

In [10]:
df['GP.TYPE'] = df['GP.TYPE'].replace('flocks', 'Cliques').replace('convoys', 'MCS')

In [11]:
tmp = df.loc[((df['dist'] == 1852) | (df['card'] == 5) | (df['dt'] == 15)) & (df['dt'] != 10)]
tmp.reset_index(drop=True, inplace=True)
tmp

Unnamed: 0,FILENAME,GP.TYPE,\#GPs,AVG_DURATION,AVG_SIZE,AVG_VELOCITY,AVG_DISTANCE,card,dt,dist
0,flocks_card_5_dt_15_dist_2315,Cliques,55667,50.580488,6.68299,4.220999,1.493344,5,15,1.25
1,flocks_card_5_dt_15_dist_463,Cliques,29,35.793103,5.103448,1.965971,0.43785,5,15,0.25
2,convoys_card_5_dt_30_dist_926,MCS,1188,86.685185,7.084175,3.33919,1.543196,5,30,0.5
3,convoys_card_5_dt_60_dist_1852,MCS,1430,132.352448,7.169231,3.459315,3.167116,5,60,1.0
4,convoys_card_5_dt_15_dist_1389,MCS,3606,60.942318,7.549362,3.936768,1.661319,5,15,0.75
5,convoys_card_5_dt_60_dist_926,MCS,494,150.84413,6.908907,3.003172,2.100594,5,60,0.5
6,convoys_card_5_dt_60_dist_1389,MCS,1052,135.227186,7.04943,3.245116,2.625212,5,60,0.75
7,convoys_card_5_dt_30_dist_1852,MCS,2954,85.583954,7.46784,3.883968,2.465341,5,30,1.0
8,convoys_card_5_dt_45_dist_1852,MCS,1972,110.143002,7.279412,3.618718,2.827189,5,45,1.0
9,convoys_card_5_dt_15_dist_926,MCS,2246,55.782725,7.258682,3.74876,1.231729,5,15,0.5


In [18]:
plt.close("all")

import matplotlib
cmap = matplotlib.cm.get_cmap('tab10')

for sdf1_name, sdf1 in tmp.loc[(tmp.dt==15) & (tmp.card==5)].groupby(['dt', 'card']):
    gp_type = sdf1['GP.TYPE'].value_counts().values
    if not ((len(gp_type) == 2) and np.all(gp_type >= 2)):
        continue
        
    fig, ax1 = plt.subplots()
#     print(gp_type)
    labels = []
    
    for idx, (sdf_name, sdf) in enumerate(sdf1.groupby(['GP.TYPE'])):
#         print (sdf)
        sdf.sort_values(by=['dist'], inplace=True)
        if len(sdf) <= 2:
            continue

        color = 'black'
        ax1.set_ylabel('', color=color)
        sdf[['dist', 'AVG_DISTANCE']].plot(x='dist', y='AVG_DISTANCE', legend=False, c=cmap(idx), marker='o', ms=5, ax=ax1, fontsize=10)
        plt.axis('tight')
        
        labels.append(sdf_name)
        ax1.tick_params(axis='y', labelcolor=color)
        
        
#     plt.title(f'dt:{sdf1_name[0]} dist:{sdf1_name[1]}')
#     plt.title(f'dt:{sdf1_name[0]} card:{sdf1_name[1]}')
    plt.suptitle(f'Temporal Threshold: {sdf1_name[0]} min., Cardinality Threshold: {sdf1_name[1]}', fontsize=10, y=1.045)
    plt.ylabel('Avg. Distance Travelled (n.m.)', fontsize=10)
    plt.xlabel('Distance Threshold (n.m.)', fontsize=10)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    plt.minorticks_off()
    
    
    h1, _ = ax1.get_legend_handles_labels()
    ax1.legend(h1, labels, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45), fontsize=10)
#     ax1.legend(h1, labels, loc='upper center', ncol=2, frameon=False)
    
    xticks_new = sdf1['dist'].unique()
    ax1.set_xticks(xticks_new)
    
    plt.show()
    plt.savefig(f'gp_plots_V6/dt_{int(sdf1_name[0])}_card_{sdf1_name[1]}_AVG_DISTANCE.pdf', dpi=350, bbox_inches='tight')

In [17]:
plt.close("all")

import matplotlib
cmap = matplotlib.cm.get_cmap('tab10')

for sdf1_name, sdf1 in tmp.loc[(tmp.dist==1.0) & (tmp.card==5)].groupby(['dist', 'card']):
    gp_type = sdf1['GP.TYPE'].value_counts().values
    if not ((len(gp_type) == 2) and np.all(gp_type >= 2)):
        continue
        
    fig, ax1 = plt.subplots()
#     print(gp_type)
    labels = []
    
    for idx, (sdf_name, sdf) in enumerate(sdf1.groupby(['GP.TYPE'])):
#         print (sdf)
        sdf.sort_values(by=['dt'], inplace=True)
        if len(sdf) <= 2:
            continue

        color = 'black'
        ax1.set_xlabel('Temporal Threshold (minutes)')
        ax1.set_ylabel('', color=color)
        sdf[['dt', 'AVG_DISTANCE']].plot(x='dt', y='AVG_DISTANCE', legend=False, c=cmap(idx), marker='o', ms=5, ax=ax1, fontsize=10)
        
        plt.axis('tight')
        labels.append(sdf_name)
        ax1.tick_params(axis='y', labelcolor=color)
        
#     plt.title(f'dt:{sdf1_name[0]} dist:{sdf1_name[1]}')
#     plt.title(f'dt:{sdf1_name[0]} card:{sdf1_name[1]}')
    plt.suptitle(f'Distance Threshold: {sdf1_name[0]} knots, Cardinality Threshold: {sdf1_name[1]}', fontsize=10, y=1)
    plt.ylabel('Avg. Distance Travelled (n.m.)', fontsize=10)
    plt.xlabel('Temporal Threshold (minutes)', fontsize=10)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    plt.minorticks_off()
    
    h1, _ = ax1.get_legend_handles_labels()
    ax1.legend(h1, labels, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45), fontsize=10)
#     ax1.legend(h1, labels, loc='upper center', ncol=2, frameon=False)
    xticks_new = sdf1['dt'].unique()
    ax1.set_xticks(xticks_new)

    plt.show()
    plt.savefig(f'gp_plots_V6/dist_{int(sdf1_name[0])}_card_{sdf1_name[1]}_AVG_DISTANCE.pdf', dpi=350, bbox_inches='tight')

In [15]:
plt.close("all")

import matplotlib
cmap = matplotlib.cm.get_cmap('tab10')

for sdf1_name, sdf1 in tmp.loc[(tmp.dist==1.0) & (tmp.dt==15)].groupby(['dist', 'dt']):
    gp_type = sdf1['GP.TYPE'].value_counts().values
    if not ((len(gp_type) == 2) and np.all(gp_type >= 2)):
        continue
        
    fig, ax1 = plt.subplots()
#     print(gp_type)
    labels = []
    
    for idx, (sdf_name, sdf) in enumerate(sdf1.groupby(['GP.TYPE'])):
#         print (sdf)
        sdf.sort_values(by=['card'], inplace=True)
        if len(sdf) <= 2:
            continue

        color = 'black'
        ax1.set_ylabel('', color=color)
        sdf[['card', '\#GPs']].plot(x='card', y='\#GPs', legend=False, c=cmap(idx), marker='o', ms=5, ax=ax1, fontsize=10)
        plt.axis('tight')
        
        labels.append(sdf_name)
        ax1.tick_params(axis='y', labelcolor=color)


#     plt.title(f'dt:{sdf1_name[0]} dist:{sdf1_name[1]}')
#     plt.title(f'dt:{sdf1_name[0]} card:{sdf1_name[1]}')
    plt.suptitle(f'Distance Threshold: {sdf1_name[0]} knots, Temporal Threshold: {sdf1_name[1]} min.', fontsize=10, y=1.045)
    plt.ylabel('\#Group Patterns', fontsize=10)
    plt.xlabel('Cardinality', fontsize=10)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    plt.minorticks_off()
    
    
    h1, _ = ax1.get_legend_handles_labels()
    ax1.legend(h1, labels, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45), fontsize=10)
#     ax1.legend(h1, labels, loc='upper center', ncol=2, frameon=False)
    xticks_new = sdf1['card'].unique()
    ax1.set_xticks(xticks_new)
    
    plt.show()
    plt.savefig(f'gp_plots_V6/dist_{sdf1_name[0]}_dt_{sdf1_name[1]}_no_of_GPs.pdf', dpi=350, bbox_inches='tight')

# Default Params: card=5, dt=15, dist=1852

In [20]:
# df = pd.read_csv('./data/csv/stats/GP_STATS/GP_STATS1.csv')
df_F, df_C = [pd.read_csv(f'./data/csv/stats/GP_STATS/{i}_card_5_dt_15_dist_1852_trip_contributions.csv') for i in ['flocks', 'convoys']]

In [21]:
cmap=plt.cm.tab20c
colors = [cmap(i) for i in range(4)]

In [24]:
explode = (0.04, 0, 0, 0)  


ax = df_F[[f'C{i}' for i in range(1,5)]].sum(axis=0).plot.pie(y='', 
                   figsize=(PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT), 
                   explode=explode,
                   startangle=0,
                   autopct='%1.1f\%%', 
                   pctdistance=0.67,
                   colors=colors,
                   legend=False)
ax.axis('equal')

for text in ax.texts:
    text.set_fontsize(8)
    if ' ' in text.get_text():
        text.set_color('grey')

plt.ylabel('')
plt.title('Cliques Trip Contribution')
plt.savefig('./gp_plots_V6/Flocks_Trip_Contribution.pdf', dpi=350, bbox_inches='tight')

In [26]:
explode = (0.04, 0, 0, 0)  


ax = df_C[[f'C{i}' for i in range(1,5)]].sum(axis=0).plot.pie(y='', 
                   figsize=(PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT), 
                   explode=explode,
                   startangle=0,
                   autopct='%1.1f\%%', 
                   pctdistance=0.67,
                   colors=colors,
                   legend=False)
ax.axis('equal')

for text in ax.texts:
    text.set_fontsize(8)
    if ' ' in text.get_text():
        text.set_color('grey')

plt.ylabel('')
plt.title('MCS Trip Contribution')
plt.savefig('./gp_plots_V6/Convoys_Trip_Contribution.pdf', dpi=350, bbox_inches='tight')

In [147]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

fname = 'TEST_IMAGE.png'
image = Image.open(fname).convert("L")
arr = np.asarray(image)
plt.imshow(arr, cmap='gray', vmin=0, vmax=255)
plt.grid(False)
plt.show()