In [5]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from random import choice
pd.options.mode.chained_assignment = None 
import configparser
import pickle
import psycopg2
import psycopg2.extras
import contextily as ctx

In [6]:
import os, sys
sys.path.append(os.path.join(os.path.expanduser('~'), 'Documents/Insert-Generic-Name-Here'))
# sys.path

from lonelyboy.geospatial import plots as gsplt
from lonelyboy.geospatial import preprocessing as gspp
from lonelyboy.timeseries import lbtimeseries as tspp
# from lonelyboy.geospatial import group_patterns as gsgp


# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import PyQt5
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')

style.use('ggplot')

get_ipython().magic('matplotlib qt')
# get_ipython().magic('matplotlib inline')

In [7]:
PLT_IMAGE_WIDTH = 3.748
PLT_IMAGE_HEIGHT = PLT_IMAGE_WIDTH/1.618

plt.rc('text', usetex=True)
plt.rc('font', family='sans-serif', size=8)
plt.rcParams['figure.figsize'] = (PLT_IMAGE_WIDTH, PLT_IMAGE_HEIGHT)

In [8]:
def tuple_str_to_tuple(_str_):
    return tuple(map(int, _str_[1:-1].split(',')))

def tuple_str_to_list(_str_):
    return list(map(int, _str_[1:-1].split(',')))

# Reading Data

In [13]:
# df = pd.read_csv('./data/csv/stats/GP_STATS/GP_STATS1.csv')
df = pd.concat([pd.read_csv(f'./data/csv/stats/GP_STATS/GP_STATS{i}.csv') for i in range(1,6)], ignore_index=True)
print(len(df))
df.head()

65


Unnamed: 0,FILENAME,GP.TYPE,#GPs,AVG.DURATION,AVG.SIZE,AVG.VELOCITY,AVG.DISTANCE
0,convoys_card_5_dt_30_dist_926,convoys,1188,86.685185,7.084175,3.33919,2.858
1,convoys_card_5_dt_60_dist_1852,convoys,1430,132.352448,7.169231,3.459315,5.865499
2,convoys_card_5_dt_15_dist_1389,convoys,3606,60.942318,7.549362,3.936768,3.076762
3,convoys_card_5_dt_60_dist_926,convoys,494,150.84413,6.908907,3.003172,3.890299
4,convoys_card_5_dt_60_dist_1389,convoys,1052,135.227186,7.04943,3.245116,4.861893


In [14]:
df.rename(columns={"#GPs": "\#GPs"}, inplace=True)

df.loc[:,'card'] = df['FILENAME'].apply(lambda x: int(x.split('_')[2]))
df.loc[:,'dt'] = df['FILENAME'].apply(lambda x: int(x.split('_')[4]))
df.loc[:,'dist'] = df['FILENAME'].apply(lambda x: int(x.split('_')[6]))
df.head()

Unnamed: 0,FILENAME,GP.TYPE,\#GPs,AVG.DURATION,AVG.SIZE,AVG.VELOCITY,AVG.DISTANCE,card,dt,dist
0,convoys_card_5_dt_30_dist_926,convoys,1188,86.685185,7.084175,3.33919,2.858,5,30,926
1,convoys_card_5_dt_60_dist_1852,convoys,1430,132.352448,7.169231,3.459315,5.865499,5,60,1852
2,convoys_card_5_dt_15_dist_1389,convoys,3606,60.942318,7.549362,3.936768,3.076762,5,15,1389
3,convoys_card_5_dt_60_dist_926,convoys,494,150.84413,6.908907,3.003172,3.890299,5,60,926
4,convoys_card_5_dt_60_dist_1389,convoys,1052,135.227186,7.04943,3.245116,4.861893,5,60,1389


In [15]:
(df['card'].unique(), df['dt'].unique(), df['dist'].unique())

(array([ 5,  3,  8, 12]),
 array([30, 60, 15, 45, 10]),
 array([ 926, 1852, 1389]))

In [16]:
tmp = df.loc[(df['GP.TYPE'] == 'flocks') & (df['dt'] == 30) & (df['dist'] == 926)]
tmp

Unnamed: 0,FILENAME,GP.TYPE,\#GPs,AVG.DURATION,AVG.SIZE,AVG.VELOCITY,AVG.DISTANCE,card,dt,dist
58,flocks_card_12_dt_30_dist_926,flocks,0,0.0,0.0,0.0,0.0,12,30,926


In [50]:
tmp.columns

Index(['FILENAME', 'GP.TYPE', '\#GPs', 'AVG.DURATION', 'AVG.SIZE',
       'AVG.VELOCITY', 'AVG.DISTANCE', 'card', 'dt', 'dist'],
      dtype='object')

In [55]:
tmp[['card', 'AVG.DURATION', 'AVG.SIZE', 'AVG.VELOCITY', 'AVG.DISTANCE']].plot(x='card', legend=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7f7e84d94b70>

In [92]:
fig, ax1 = plt.subplots()

color = 'black'
ax1.set_xlabel('cardinality threshold')
ax1.set_ylabel('', color=color)
tmp[['card', 'AVG.SIZE', 'AVG.VELOCITY', 'AVG.DISTANCE']].plot(x='card', legend=False, cmap='tab10', ax=ax1)
ax1.tick_params(axis='y', labelcolor=color)
# ax1.legend(loc='upper left')

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'black'
ax2.set_ylabel('Avg. Duration (minutes)', color=color)  # we already handled the x-label with ax1
tmp[['card', 'AVG.DURATION']].plot(x='card', legend=False, ax=ax2, cmap='Pastel1')
ax2.tick_params(axis='y', labelcolor=color)
# ax2.legend(loc='upper right')

h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1+h2, l1+l2, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45))

plt.show()
ax2.grid(False)

plt.title(f'flocks, dt=30, dist=926')

plt.savefig('TEST_PLOT.pdf', dpi=350, bbox_inches='tight')

# Let's Get DANGEROUS

In [89]:
from itertools import combinations

tmp3000 = [j for j in combinations(['card', 'dt', 'dist'], 2)]
tmp3000

[('card', 'dt'), ('card', 'dist'), ('dt', 'dist')]

In [90]:
[len(df.groupby(list(i)).groups) for i in tmp3000]

[11, 10, 11]

In [115]:
for sdf_name, sdf in df.groupby(list(('GP.TYPE', 'dt', 'dist'))):
    if len(sdf) <= 2:
        continue
    
    fig, ax1 = plt.subplots()
    sdf.sort_values(by=['card'], inplace=True)
    
    color = 'black'
    ax1.set_xlabel('cardinality threshold')
    ax1.set_ylabel('', color=color)
    sdf[['card', 'AVG.SIZE', 'AVG.VELOCITY', 'AVG.DISTANCE']].plot(x='card', legend=False, cmap='tab10', ax=ax1)
    ax1.tick_params(axis='y', labelcolor=color)
    # ax1.legend(loc='upper left')

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

    color = 'black'
    ax2.set_ylabel('Avg. Duration (minutes)', color=color)  # we already handled the x-label with ax1
    sdf[['card', 'AVG.DURATION']].plot(x='card', legend=False, ax=ax2, cmap='Pastel1')
    ax2.tick_params(axis='y', labelcolor=color)
    # ax2.legend(loc='upper right')

    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1+h2, l1+l2, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45))

    plt.show()
    ax2.grid(False)

    plt.title(f'{sdf_name[0]}, dt:{sdf_name[1]}, dist:{sdf_name[2]}')

    plt.savefig(f'gp_plots/{sdf_name[0]}_dt_{sdf_name[1]}_dist_{sdf_name[2]}.pdf', dpi=350, bbox_inches='tight')

In [116]:
for sdf_name, sdf in df.groupby(list(('GP.TYPE', 'card', 'dist'))):
    if len(sdf) <= 2:
        continue
    fig, ax1 = plt.subplots()
    sdf.sort_values(by=['dt'], inplace=True)
    
    color = 'black'
    ax1.set_xlabel('cardinality threshold')
    ax1.set_ylabel('', color=color)
    sdf[['dt', 'AVG.SIZE', 'AVG.VELOCITY', 'AVG.DISTANCE']].plot(x='dt', legend=False, cmap='tab10', ax=ax1)
    ax1.tick_params(axis='y', labelcolor=color)
    # ax1.legend(loc='upper left')

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

    color = 'black'
    ax2.set_ylabel('Avg. Duration (minutes)', color=color)  # we already handled the x-label with ax1
    sdf[['dt', 'AVG.DURATION']].plot(x='dt', legend=False, ax=ax2, cmap='Pastel1')
    ax2.tick_params(axis='y', labelcolor=color)
    # ax2.legend(loc='upper right')

    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1+h2, l1+l2, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45))

    plt.show()
    ax2.grid(False)

    plt.title(f'{sdf_name[0]} card:{sdf_name[1]} dist:{sdf_name[2]}')

    plt.savefig(f'gp_plots/{sdf_name[0]}_card_{sdf_name[1]}_dist_{sdf_name[2]}.pdf', dpi=350, bbox_inches='tight')

In [117]:
for sdf_name, sdf in df.groupby(list(('GP.TYPE', 'card', 'dt'))):
    if len(sdf) <= 2:
        continue
    fig, ax1 = plt.subplots()
    sdf.sort_values(by=['dist'], inplace=True)
    
    color = 'black'
    ax1.set_xlabel('cardinality threshold')
    ax1.set_ylabel('', color=color)
    sdf[['dist', 'AVG.SIZE', 'AVG.VELOCITY', 'AVG.DISTANCE']].plot(x='dist', legend=False, cmap='tab10', ax=ax1)
    ax1.tick_params(axis='y', labelcolor=color)
    # ax1.legend(loc='upper left')

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

    color = 'black'
    ax2.set_ylabel('Avg. Duration (minutes)', color=color)  # we already handled the x-label with ax1
    sdf[['dist', 'AVG.DURATION']].plot(x='dist', legend=False, ax=ax2, cmap='Pastel1')
    ax2.tick_params(axis='y', labelcolor=color)
    # ax2.legend(loc='upper right')

    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1+h2, l1+l2, loc='lower center', ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.45))

    plt.show()
    ax2.grid(False)

    plt.title(f'{sdf_name[0]} card:{sdf_name[1]} dt:{sdf_name[2]}')

    plt.savefig(f'gp_plots/{sdf_name[0]}_card_{sdf_name[1]}_dt_{sdf_name[2]}.pdf', dpi=350, bbox_inches='tight')