In [None]:
## Standard Libraries
import os
import sys
import time
import math
import random
import signal
from pathlib import Path
import pickle
from functools import cache, partial
import multiprocessing as mp

## Graph Manipulation
import igraph as ig

## Data Manipulation
from geopandas import GeoDataFrame
import sqlite3

## Data Visualization
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib.patches as mpatches

## Geometry Manipulation
from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString, Point, MultiPoint
from shapely.ops import unary_union
from shapely import Geometry

##Intra-package imports
#Facilitate in-file testing
#check if module exists in sys.modules
#if not, add the parent module to the path
if 'data_processing' not in sys.modules:
    #we are in (root)/cli, data_processing is at (root)/data_processing
    sys.path.append(str(Path(__file__).parent.parent))
parent_module = sys.modules['.'.join(__name__.split('.')[:-1]) or '__main__']
if __name__ == '__main__' or parent_module.__name__ == '__main__':
    from data_processing.file_paths import file_paths
    import data_processing.graph_utils as graph_utils
    import data_processing.gpkg_utils as gpkg_utils
    import data_processing.subset as subset
else:
    from data_processing.file_paths import file_paths
    from data_processing import graph_utils
    from data_processing import gpkg_utils
    from data_processing import subset

ONCE = (True,)

def heavy_union(geoms:list)->Geometry:
    pass

def mp_is_child():
    return mp.current_process().name != 'MainProcess'

def mp_init_shared_mem_generic(**_data):
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    global data
    data = {}
    data.update(_data)

@cache
def all_wbids():
    db = sqlite3.connect(file_paths.conus_hydrofabric())
    data = db.execute("SELECT id FROM divides").fetchall()
    db.close()
    return set([d[0] for d in data if isinstance(d[0], str) and "wb" in d[0]])

def data_bisect(data: list, bounds: tuple, path="")-> list[list, tuple]:
    #Given a line, return two partitions of the data, and their bounds
    if len(data) == 0:
        return []
    # centr = lambda g: (lambda b: ((b[0] + b[2]) / 2, (b[1] + b[3]) / 2))(g.bounds)
    centr = lambda g: (g.centroid.x, g.centroid.y)
    axis = 0 if bounds[2] - bounds[0] > bounds[3] - bounds[1] else 1
    sortby = lambda x: centr(x)[0] if axis == 0 else centr(x)[1]
    repl = lambda i, ax, v: tuple(l if (not (k%2)^ax and k//2 == i) else v for k, l in enumerate(bounds))
    dirs = ["L", "R"] if axis == 0 else ["D", "U"]
    data.sort(key=sortby)
    div = len(data) // 2
    div_val = sortby(data[div])
    bound0 = repl(0, axis, div_val)
    bound1 = repl(1, axis, div_val)
    return [
        (data[:div], bound0, path + dirs[0]),
        (data[div:], bound1, path + dirs[1])
        ]

def geometric_partition(
        data: list, 
        partition_size: int, 
        bounds: tuple,
        path=""
        ) -> list[list, tuple]:
    assert bounds[0] < bounds[2]
    assert bounds[1] < bounds[3]
    if len(data) <= partition_size:
        return [(data, bounds, path)]
    parts = data_bisect(data, bounds, path)
    return parts

if __name__ == '__main__':
    pass
    