#### Imports

In [1]:
import numpy as np
import shapely
import bisect
import shapely.wkt
import matplotlib.pyplot as plt
import math
import geopandas as gpd
import json
import pandas as pd
import seaborn as sns
from enum import Enum
import plotly.graph_objects as go
from plotly.subplots import make_subplots


import intersection.first_bin_search
import intersection.chunk_bbox_intersection
from bench_utils import parse_intersection_data 
common_bbox = intersection.chunk_bbox_intersection.common_bbox
chunk_bbox_is_intersecting = intersection.chunk_bbox_intersection.is_intersecting
chunk_bbox_intersection = intersection.chunk_bbox_intersection.intersection



from intersection.plotting import *
import algos.fpd_extended_lib.cfg as cfg
from algos.alg_fpd_extended import FpdExtended


pd.options.mode.chained_assignment = None  # default='warn'

#### Import data

In [4]:
fpd = FpdExtended()

# Geometries
manual_data, _ = parse_intersection_data("manual")
special_cases, _ = parse_intersection_data("latest_export.json", strip_precision=True)
world_data, world_data_stats = parse_intersection_data("world.json",1000, strip_precision=True)
lund_data, lund_data_stats = parse_intersection_data("lund.json",5000)
sweden_data, sweden_data_stats = parse_intersection_data("sweden-places-a.json",1000)
new_york_data, new_york_data_stats = parse_intersection_data("new-york-natural.json",1000)
waterways_data, waterways_data_stats = parse_intersection_data("nord-est-waterways.json",1000)

datasets = [
    manual_data,
    special_cases,
    world_data,
    lund_data,
    sweden_data,
    new_york_data,
    waterways_data
]


#### Constants

In [9]:
# Number of total iterations = NBR_ITER * N * number_of_datasets
NBR_ITER = -1 # Number of iterations for each dataset
N = 1 # Number of times to recalculate each geometry for reliable mean value

# Max DELTA_SIZE_VALUE in MAX_NUM_DELTAS Analysis
MAX_DELTA_SIZE = 100

#Geometry classification sizes
MEDIUM_THRESHOLD = 50
LARGE_THRESHOLD = 100

## Functions Section

#### Config Setter Functions

In [10]:
def set_max_num_delta(val):
        cfg.MAX_NUM_DELTAS = val
        cfg.D_CNT_SIZE = cfg.required_bits(val)

def act_deact_baseline(activate=True):
        cfg.BASELINE_ON = activate
        cfg.DISABLE_OPTIMIZED_INTERSECTION = False if not activate else True
        cfg.DISABLE_OPTIMIZED_ADD_VERTEX = False if not activate else True
        cfg.DISABLE_OPTIMIZED_BOUNDING_BOX = False if not activate else True

#### Stats Collector Functions

In [11]:
def get_size_category(size):
    if size < LARGE_THRESHOLD and size >= MEDIUM_THRESHOLD:
            return "Medium", 1
    elif size >= LARGE_THRESHOLD:
         return "Large", 2
    else:
         return "Small", 0
    
def get_intersection_category(size1, size2):
    size1_cat, type1 = get_size_category(size1)
    size2_cat, type2 = get_size_category(size2)

    if type1 <= type2:
        return size1_cat + "/" + size2_cat
    return size2_cat + "/" + size1_cat


def get_context_category(g1, g2, bins=None):
    if bins == None:
        _, b1_fpde = fpd.compress(g1)
        _, b2_fpde = fpd.compress(g2) 
    else:
        b1_fpde, b2_fpde = bins

    bbox = common_bbox((b1_fpde, b2_fpde))
    intersects = shapely.intersection(g1, g2)
    contains = shapely.contains(g1, g2) or shapely.contains(g2, g1)
    if not bbox:
        return "No Overlap (FALSE)"
    elif not intersects:
        return "Overlap (FALSE)"
    elif not contains:
        return "Partial Overlap (TRUE)"
    else:
        return "Fully Inside Other (TRUE)"

def get_category_stat(g1, g2):
        s1, s2 = (shapely.get_num_coordinates(g1), shapely.get_num_coordinates(g2))
        size_category = get_intersection_category(s1, s2)
        context_category = get_context_category(g1, g2)
        return [size_category, context_category]


#For taking mean of all stats, not just total time
def get_time_stat(bins, predicate=False):
    alg = chunk_bbox_is_intersecting if predicate else chunk_bbox_intersection
    # 'decomp', 'nbr_recieved_chks', 'nbr_total_chks', 'total_time'
    mean_stat = [0, 0, 0, 0]
    for i in range(N):
        stat, _ = alg(bins, get_stats=True)
        mean_stat = [sum(x) for x in zip(mean_stat, stat)]
    return mean_stat


#### Dataset Evaluation

In [13]:
def evaluate_dataset(data, stats_df, delta_size=None, not_predicate=False, add_size=False):
    for idx, (g1, g2) in enumerate(data):
        if delta_size != None:
            set_max_num_delta(delta_size)
        #is_intersecting, intersect_points = binary_intersection(g1, g2)
        bins = (fpd.compress(g1)[1], fpd.compress(g2)[1])

        for is_predicate in [False] if not_predicate else [True, False]:
            stats = []
            stats.extend(get_time_stat(bins=bins, predicate=is_predicate))
            stats.extend(get_category_stat(g1, g2))
            stats.append(delta_size if delta_size != None else cfg.MAX_NUM_DELTAS)
            stats.append(is_predicate)
            stats.append(idx)
            stats.append(cfg.BASELINE_ON)
            if add_size:
                stats.append(len(bins[0]) + len(bins[1]))
            stats_df.loc[len(stats_df)] = stats

# Statistics Visualization

#### Best MAX_NUM_DELTAS per Dataset Analysis

In [14]:
from tqdm import tqdm
max_deltas_df = pd.DataFrame(columns=['decomp', 'nbr_recieved_chks', 'nbr_total_chks', 'total_time', "sizes", "context", "delta_size", "predicate", "dataset_idx", "baseline", "sum_size"])
for max_delta_size in tqdm(range(0,MAX_DELTA_SIZE)):
    for dataset in datasets:
        for baseline_on in [True, False]:
            act_deact_baseline(activate=baseline_on)        
            evaluate_dataset(data=dataset, stats_df=max_deltas_df, delta_size=max_delta_size,not_predicate=True, add_size=True)    

  1%|          | 1/100 [04:21<7:11:41, 261.64s/it]

In [None]:
max_deltas_filt_df = max_deltas_df[['total_time', "sizes", "delta_size", "predicate", "baseline"]]

In [None]:
def plot_max_delta_size_speed(df, is_predicate=False):
    df = df[df['predicate'] == is_predicate]
    df = df.groupby(['delta_size', 'sizes', 'baseline']).mean()
    df.reset_index(inplace=True)
    
    sizes = dict(zip(list(df.sizes.unique()), range(len(df.sizes.unique()))))
    fig = make_subplots(rows=2, cols=3, subplot_titles=(list(sizes.keys())), horizontal_spacing=0.1, x_title="Max Chunk Size", y_title="Mean Execution Time (s)")
    for size in sizes.keys():
        curr_context_df = df[(df['sizes'] == size)]
        for is_baseline in [True, False]:
            curr_df = curr_context_df[curr_context_df['baseline'] == is_baseline]
            baseline_formatting = "FPDE" if not is_baseline else "Baseline"
            fig.add_trace(go.Scatter(x=curr_df.delta_size.values, y=curr_df.total_time.values,
                                    mode='lines',
                                    name=f"{baseline_formatting}"),row=sizes[size] // 3 + 1, col=sizes[size] % 3 + 1)
            
    
    intersection_format = "Intersection" if not is_predicate else "IsIntersection"
    baseline_format = "" if not is_baseline else "(Baseline)"
    fig.update_layout(title=f"Execution Time for {intersection_format} by Max Chunk Size {baseline_format}",title_x=0.5)
    fig.show()

In [None]:
plot_max_delta_size_speed(max_deltas_filt_df, is_predicate=False)
plot_max_delta_size_speed(max_deltas_filt_df, is_predicate=False)

#### Dataset Stats Collection

In [None]:
stats_df = pd.DataFrame(columns=['decomp', 'nbr_recieved_chks', 'nbr_total_chks', 'total_time', "sizes", "context", "delta_size", "predicate", "dataset_idx", "baseline"])
for baseline_on in [True, False]:
    act_deact_baseline(activate=baseline_on)
    for dataset in tqdm(datasets):
        evaluate_dataset(dataset, stats_df) # Second param: PLOT?

KeyboardInterrupt: 

In [None]:
if True:
    stats_df.to_csv('stats_df.csv', index=False)


#### Total Execution Time by Context

In [None]:
import plotly.graph_objs as objs

total_time_df = stats_df[['context', 'total_time', 'sizes', 'baseline', 'predicate']]
def make_general_facet_title(fig, x_title, y_title):
    #Remove current axis titles
    for axis in fig.layout:
        if type(fig.layout[axis]) == objs.layout.XAxis:
            fig.layout[axis].title.text = ''

    for axis in fig.layout:
        if type(fig.layout[axis]) == objs.layout.YAxis:
            fig.layout[axis].title.text = '' 
    

    fig.add_annotation(
    dict(
        x=0.5,
        y=-0.2,  # adjust as needed
        showarrow=False,
        text=x_title,
        xref="paper",
        yref="paper",
        font=dict(size=14),
        xanchor="center", 
        yanchor="top"
    )
)
    # Add a common y-axis title at the bottom
    fig.add_annotation(
    dict(
        x=-0.04,  # adjust as needed
        y=0.5,
        showarrow=False,
        text=y_title,
        textangle=-90,  # to rotate the text for the vertical y-axis
        xref="paper",
        yref="paper",
        font=dict(size=14),
        xanchor="center",
        yanchor="middle"
    )
)

##### Boxes

In [None]:
import plotly.express as px
def plot_total_time_box(df, is_predicate=False, size_cat_excl=[], log_scale=True):
    df = df[df['predicate'] == is_predicate]
    
    for cat in size_cat_excl:
        df = df[df['sizes'].str.contains(cat) == False]

    df.reset_index(inplace=True)
    intersection_formatting = "Intersection" if not is_predicate else "Is_Intersection"
    fig = px.box(df, 
                 x="context", 
                 y="total_time",
                 facet_col="sizes", 
                 color="baseline",
                 boxmode="group", 
                 log_y=log_scale,
                 facet_col_spacing=0.1, 
                 facet_row_spacing=0.1,
                 title=f"Execution Time for {intersection_formatting} in Different Contexts & Sizes",
                 labels=dict(sizes="Sizes", context="Context", baseline="Baseline"),
                 facet_col_wrap=3,
                 height=700
                 )
    make_general_facet_title(fig, "Bouning Box Context", "Mean Execution Time (s)")
    fig.update_yaxes(matches=None)
    fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))
    
    fig.show()

##### Bars

In [None]:
def plot_total_time_bars(df, is_predicate=False, size_cat_excl=[], log_scale=True):
    df = df[df['predicate'] == is_predicate]
    
    for cat in size_cat_excl:
        df = df[df['sizes'].str.contains(cat) == False]
    
    df.reset_index(inplace=True)
    intersection_formatting = "Intersection" if not is_predicate else "Is_Intersection"
    fig = px.histogram(df, 
                       x="context", 
                       y="total_time", 
                       color='baseline',
                       facet_col="sizes", 
                       barmode='group',
                       log_y=log_scale, 
                       facet_col_spacing=0.1, 
                       histfunc="avg", 
                       title=f"Execution Time for {intersection_formatting} in Different Contexts & Sizes",
                       labels=dict(sizes="Sizes", context="Context", baseline="Baseline"),
                       facet_col_wrap=3,
                       facet_row_spacing=0.1,
                       height=700,
                       )

    make_general_facet_title(fig, "Bouning Box Context", "Mean Execution Time (s)")

    fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.update_yaxes(matches=None, showticklabels=True, visible=True)
    fig.update_layout(barmode="group")
   
    fig.show()


#### Chunk Unfolding Analyzation

In [None]:
chunk_unfolded_df = stats_df[['nbr_recieved_chks', 'nbr_total_chks','predicate','baseline', 'context', 'sizes']]
chunk_unfolded_df["chk_fraction_unfolded"] = chunk_unfolded_df["nbr_recieved_chks"].div(chunk_unfolded_df.nbr_total_chks, axis=0) * 100

In [None]:
def plot_chunk_unfolded_frac(df, is_predicate=False, is_baseline=False):
    df = df[df['predicate'] == is_predicate]
    df = df[df['baseline'] == is_baseline]

    df = df.groupby(['context', 'sizes']).mean()
    df.reset_index(inplace=True)
    x = [
        df.context.values,
        df.sizes.values
        
    ]
    fig = go.Figure()
    fig.add_bar(y=x,x=df.chk_fraction_unfolded.values, name="Chunk Fraction Unfolded", orientation='h')

    intersection_format = "Intersection" if not is_predicate else "IsIntersection"
    baseline_format = "" if not is_baseline else "(BASELINE)"

    fig.update_layout(title=f"Fraction of Chunks Unfolded for {intersection_format} {baseline_format}",title_x=0.5)
    fig.update_layout(barmode="relative", xaxis_title="Chunks Unfolded (%)", yaxis_title="Context + Geometry Size")

    fig.show()

#### Fraction of Time for Partial Decompression Analysis

In [None]:
partial_frac_df = stats_df[['decomp', 'total_time', 'predicate','baseline', 'context', 'sizes']]
partial_frac_df["decomp"] = partial_frac_df["decomp"].div(partial_frac_df.total_time/100, axis=0)
partial_frac_df['algo_time'] = partial_frac_df.apply(lambda x: 100 - x['decomp'], axis=1)

In [None]:
def plot_partial_decomp_frac(df, is_predicate=False, is_baseline=False):
    df = df[df['predicate'] == is_predicate]
    df = df[df['baseline'] == is_baseline]

    df = df.groupby(['context', 'sizes']).mean()
    df.reset_index(inplace=True)
    x = [
        df.context.values,
        df.sizes.values
    ]

    intersection_format = "Intersection" if not is_predicate else "IsIntersection"
    baseline_format = "" if not is_baseline else "(Baseline)"
    
    fig = go.Figure()
    fig.add_bar(y=x,x=df.decomp.values, name="Decompression Stage",orientation='h')
    fig.add_bar(y=x,x=df.algo_time.values, name=intersection_format, orientation='h')
    fig.update_layout(title=f"Execution Time Distrubution for {intersection_format} {baseline_format}",title_x=0.5)
    fig.update_layout(barmode="relative", xaxis_title="Execution Time (%)", yaxis_title="Context + Geometry Size)")
    fig.update_layout(legend=dict(yanchor="top",y=1.2, xanchor="right", x=0.997))
    fig.show()

## Results

#### Total time Analysis

In [None]:
plot_total_time_box(total_time_df, is_predicate=False, size_cat_excl=[], log_scale=False) #
plot_total_time_bars(total_time_df, is_predicate=False, size_cat_excl=[], log_scale=False) #

#### (%) Unfolding of chunks

In [None]:
plot_chunk_unfolded_frac(chunk_unfolded_df, is_predicate=False, is_baseline=False)
plot_chunk_unfolded_frac(chunk_unfolded_df, is_predicate=True, is_baseline=False)

#### Partial Decompression Fraction of Execution

In [None]:

plot_partial_decomp_frac(partial_frac_df, is_predicate=False, is_baseline=True)
plot_partial_decomp_frac(partial_frac_df, is_predicate=False, is_baseline=False)
plot_partial_decomp_frac(partial_frac_df, is_predicate=True, is_baseline=True)
plot_partial_decomp_frac(partial_frac_df, is_predicate=True, is_baseline=False)

#### MAX_NBR_DELTAS Analysis