# Parsl Worflow Breakdown

Working through the parsl workflow for PDG datasets in chunks. Sample dataset is for lake change in the Arctic, provided by Ingmar Nitze.

In [50]:
# file paths
import os
from pathlib import Path
from datetime import datetime

# visualization
import pandas as pd
import geopandas as gpd
from shapely.geometry import box

# PDG packages
import pdgstaging
import pdgraster
import py3dtiles
import viz_3dtiles
from viz_3dtiles import TreeGenerator, BoundingVolumeRegion
from viz_3dtiles import leaf_tile_from_gdf, parent_tile_from_children_json
#import pdgpy3dtiles
#from StagedTo3DConverter import StagedTo3DConverter

# logging and configuration
from datetime import datetime
import logging
import logging.config
import argparse
import json

# Parsl
import parsl
from parsl import python_app
from parsl.config import Config
from parsl.channels import LocalChannel
from parsl.executors import HighThroughputExecutor
from parsl.providers import LocalProvider
#from parsl.executors.threads import ThreadPoolExecutor
#from parsl.providers import LocalProvider
#from parsl.providers import KubernetesProvider
#from parsl.addresses import address_by_route
#from kubernetes import client, config

### Set configuration and data path


In [None]:
# newer & larger data sample downloaded from google drive

# workflow_config = '/home/jcohen/viz-workflow/workflow_configs/ingmar-config.json'
# logging_config = '/home/jcohen/viz-workflow/workflow_configs/logging.json'
# base_dir = Path('/home/jcohen/gpkg_files_expanded')
# filename = 'lake_change_*.gpkg'
# input = [p.as_posix() for p in base_dir.glob('**/' + filename)]

In [None]:
#input

In [2]:
workflow_config = '/home/jcohen/viz-workflow/workflow_configs/ingmar-config.json'
logging_config = '/home/jcohen/viz-workflow/workflow_configs/logging.json'

base_dir = Path('/home/pdg/data/nitze_lake_change/data_sample_2022-09-09')
subdirs = ['32607', '32608', '32609']
filename = 'lake_change.gpkg'
# to define each .gpkg file within each UTM subdir as a string representation with forward slashes, use as_posix() for each iteration
# of base_dir + filename. The ** represents that any subdir string can be present between the base_dir and the filename, meaning I do not
# think that we needed to create the object subdirs above
input = [p.as_posix() for p in base_dir.glob('**/' + filename)]
input

['/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32609/05_Lake_Dataset_Raster_02_final/lake_change.gpkg',
 '/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32608/05_Lake_Dataset_Raster_02_final/lake_change.gpkg',
 '/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32607/05_Lake_Dataset_Raster_02_final/lake_change.gpkg']

### Set up `parsl` HighThroughputExecutor with LocalProvider

This will configure how we distribute the parallelization across our workers for staging, rasterizing, etc.

In [3]:
# skeleton code copied from ADC Scalable Computing Workshop

# bash command to activate virtual environment
activate_env = 'source /home/jcohen/.bashrc; conda activate pdgviz'

htex_config_local = Config(
  executors = [
      HighThroughputExecutor(
        label = "htex_Local",
        cores_per_worker = 2, 
        max_workers = 2, # why would this be so low? because just testing with small amount of data ?
          # worker_logdir_root = '/' only necessary if the file system is remote, which is not the case for this lake change sample
          # address not necessary because we are not using kubernetes
        worker_debug = False, # don't need this because we have logging setup
          # provider is local for this run thru, kubernetes would use KubernetesProvider()
        provider = LocalProvider(
          channel = LocalChannel(),
          worker_init = activate_env,
          init_blocks = 1, # default I think
          max_blocks = 10 # changed from deafult of 1
        ),
      )
    ],
  )

parsl.clear() # first clear the current configuration since we will likely run this script multiple times
parsl.load(htex_config_local) # load the config we just outlined

<parsl.dataflow.dflow.DataFlowKernel at 0x7f616754d810>

For reference, [here](https://github.com/PermafrostDiscoveryGateway/viz-workflow/blob/0beb3b14239f2dd8cd4329026dc8d9a41aece7d7/pdg_workflow/pdg_workflow.py#L32) is the HighThroughputExecutor used in the `parsl-workflow` branch.



### Explicitly define StagedTo3DConverter class & its methods rather than sourcing it in

In [4]:
logger = logging.getLogger(__name__)

class StagedTo3DConverter():
    """
        Processes staged vector data into Cesium 3D tiles according to the
        settings in a config file or dict. This class acts as the orchestrator
        of the other viz-3dtiles classes, and coordinates the sending and
        receiving of information between them.
    """

    def __init__(
        self,
        config
    ):
        """
            Initialize the StagedTo3DConverter class.
            Parameters
            ----------
            config : dict or str
                A dictionary of configuration settings or a path to a config
                JSON file. (See help(pdgstaging.ConfigManager))
        """

        self.config = pdgstaging.ConfigManager(config)
        self.tiles = pdgstaging.TilePathManager(
            **self.config.get_path_manager_config())

    def all_staged_to_3dtiles(
        self
    ):
        """
            Process all staged vector tiles into 3D tiles.
        """

        # Get the list of staged vector tiles
        paths = self.tiles.get_filenames_from_dir('staged')
        # Process each tile
        for path in paths:
            self.staged_to_3dtile(path)

    def staged_to_3dtile(self, path):
        """
            Convert a staged vector tile into a B3DM tile file and a matching
            JSON tileset file.
            Parameters
            ----------
            path : str
                The path to the staged vector tile.
            Returns
            -------
            tile, tileset : Cesium3DTile, Tileset
                The Cesium3DTiles and Cesium3DTileset objects
        """

        try:

            # Get information about the tile from the path
            tile = self.tiles.tile_from_path(path)
            out_path = self.tiles.path_from_tile(tile, '3dtiles')

            tile_bv = self.bounding_region_for_tile(tile)

            # Get the filename of the tile WITHOUT the extension
            tile_filename = os.path.splitext(os.path.basename(out_path))[0]
            # Get the base of the path, without the filename
            tile_dir = os.path.dirname(out_path) + os.path.sep

            # Log the event
            logger.info(
                f'Creating 3dtile from {path} for tile {tile} to {out_path}.')

            # Read in the staged vector tile
            gdf = gpd.read_file(path)

            # Check if the gdf is empty
            if len(gdf) == 0:
                logger.warning(
                    f'Vector tile {path} is empty. 3D tile will not be'
                    ' created.')
                return

            # Remove polygons with centroids that are outside the tile boundary
            prop_cent_in_tile = self.config.polygon_prop(
                'centroid_within_tile')
            gdf = gdf[gdf[prop_cent_in_tile]]

            # Check if deduplication should be performed
            dedup_here = self.config.deduplicate_at('3dtiles')
            dedup_method = self.config.get_deduplication_method()

            # Deduplicate if required
            if dedup_here and (dedup_method is not None):
                dedup_config = self.config.get_deduplication_config(gdf)
                dedup = dedup_method(gdf, **dedup_config)
                gdf = dedup['keep']

                # The tile could theoretically be empty after deduplication
                if len(gdf) == 0:
                    logger.warning(
                        f'Vector tile {path} is empty after deduplication.'
                        ' 3D Tile will not be created.')
                    return

            # Create & save the b3dm file
            ces_tile, ces_tileset = TreeGenerator.leaf_tile_from_gdf(
                gdf,
                dir=tile_dir,
                filename=tile_filename,
                z=self.config.get('z_coord'),
                geometricError=self.config.get('geometricError'),
                tilesetVersion=self.config.get('version'),
                boundingVolume=tile_bv
            )

            return ces_tile, ces_tileset

        except Exception as e:
            logger.error(f'Error creating 3D Tile from {path}.')
            logger.error(e)

    def parent_3dtiles_from_children(self, tiles, bv_limit=None):
        """
            Create parent Cesium 3D Tileset json files that point to of child
            JSON files in the tile tree hierarchy. This method will take a list
            of parent tiles and search the 3D tile directory for any children
            tiles to create.
            Parameters
            ----------
            tiles : list of morecantile.Tile
                The list of parent tiles to create.
        """

        tile_manager = self.tiles
        config_manager = self.config

        tileset_objs = []

        # Make the next level of parent tiles
        for parent_tile in tiles:
            # Get the path to the parent tile
            parent_path = tile_manager.path_from_tile(parent_tile, '3dtiles')
            # Get just the base dir without the filename
            parent_dir = os.path.dirname(parent_path)
            # Get the filename of the parent tile, without the extension
            parent_filename = os.path.basename(parent_path)
            parent_filename = os.path.splitext(parent_filename)[0]
            # Get the children paths for this parent tile
            child_paths = tile_manager.get_child_paths(parent_tile, '3dtiles')
            # Remove paths that do not exist
            child_paths = tile_manager.remove_nonexistent_paths(child_paths)
            # Get the parent bounding volume
            parent_bv = self.bounding_region_for_tile(
                parent_tile, limit_to=bv_limit)
            # If the bounding region is outside t
            # Get the version
            version = config_manager.get('version')
            # Get the geometric error
            geometric_error = config_manager.get('geometricError')
            # Create the parent tile
            tileset_obj = TreeGenerator.parent_tile_from_children_json(
                child_paths,
                dir=parent_dir,
                filename=parent_filename,
                geometricError=geometric_error,
                tilesetVersion=version,
                boundingVolume=parent_bv
            )
            tileset_objs.append(tileset_obj)

        return tileset_objs

    def bounding_region_for_tile(self, tile, limit_to=None):
        """
        For a morecantile.Tile object, return a BoundingVolumeRegion object
        that represents the bounding region of the tile.
        Parameters
        ----------
        tile : morecantile.Tile
            The tile object.
        limit_to : list of float
            Optional list of west, south, east, north coordinates to limit
            the bounding region to.
        Returns
        -------
        bv : BoundingVolumeRegion
            The bounding region object.
        """
        tms = self.tiles.tms
        bounds = tms.bounds(tile)
        bounds = gpd.GeoSeries(
            box(bounds.left, bounds.bottom, bounds.right, bounds.top),
            crs=tms.crs)
        if limit_to is not None:
            bounds_limitor = gpd.GeoSeries(
                box(limit_to[0], limit_to[1], limit_to[2], limit_to[3]),
                crs=tms.crs)
            bounds = bounds.intersection(bounds_limitor)
        bounds = bounds.to_crs(BoundingVolumeRegion.CESIUM_EPSG)
        bounds = bounds.total_bounds

        region_bv = {
            'west': bounds[0], 'south': bounds[1],
            'east': bounds[2], 'north': bounds[3],
        }
        return region_bv

    def make_top_level_tileset(self):
        """
        Create a top-level tileset.json file that sets all the min_z level
        tiles as its children. This is needed to display the tiles in Cesium
        when the min_z level has more than one tile.
        Returns
        -------
        tileset : Tileset
            The Cesium3DTileset object
        """

        tile_manager = self.tiles
        config_manager = self.config
        min_z = config_manager.get_min_z()

        # Make a parent tileset.json - this will combine the top level tiles if
        # there are 2, otherwise it will just refer to the top level tile.
        top_level_tiles = tile_manager.get_filenames_from_dir(
            '3dtiles', z=min_z)
        top_level_dir = tile_manager.get_base_dir('3dtiles')['path']

        return TreeGenerator.parent_tile_from_children_json(
            children=top_level_tiles,
            dir=top_level_dir
        )

### Setup logging

In [5]:
def setup_logging(log_json_file):
    """
    Setup logging configuration
    """
    with open(log_json_file, 'r') as f:
        logging_dict = json.load(f)
    logging.config.dictConfig(logging_dict)
    return logging_dict

logging_dict = setup_logging(logging_config)

### Define batch sizes and batching function

In [6]:
batch_size_staging=1 # change this depending on data sample size!!!!!
batch_size_rasterization=30
batch_size_3dtiles=20
batch_size_parent_3dtiles=500
batch_size_geotiffs=200
batch_size_web_tiles=200

In [7]:
def make_batch(items, batch_size):
    """
    Create batches of a given size from a list of items.
    """
    return [items[i:i + batch_size] for i in range(0, len(items), batch_size)]

### Create batches of input files

In [None]:
# chunk written when using original 3 gpkg files
#input
# this already is the paths to the input files, it is not a base dir
# so we do not have to use stager.tiles.get_filenames_from_dir('input')

In [None]:
input_batches = make_batch(input, batch_size_staging)
input_batches # 3 batches, 1 file each

# when batch size is 1, input_batches = input

### Configure the stager, raster tiler, and 3d tiler

Even tho these objects need to be created within the `parsl` functions when we define those, too.

In [8]:
# staging configuration
stager = pdgstaging.TileStager(workflow_config)
tile_manager = stager.tiles
config_manager = stager.config

# zoom levels configuration
min_z = config_manager.get_min_z()
max_z = config_manager.get_max_z()
parent_zs = range(max_z - 1, min_z - 1, -1)

# 3D tiler configuration
tiles3dmaker = StagedTo3DConverter(workflow_config)

# raster tilerconfiguration 
rasterizer = pdgraster.RasterTiler(workflow_config)

### Set up parsl app to stage in parallel

Need to import all necessary packages for the staging step within parsl app for staging.

I am actually not going to stage in parallel for this run thru, because there are so few input files, it messes with batching because there is only 1 gpkg per batch. I will batch for rasterization and web tiles.

In [None]:
# Decorators seem to be ignored as the first line of a cell, so print something first
print("Stage in parallel")

@python_app
def stage(paths, config, logging_dict = logging_dict): 
    """
    Stage files (step 1)
    """
    import pdgstaging
    if logging_dict:
        import logging.config
        logging.config.dictConfig(logging_dict)
    stager = pdgstaging.TileStager(config)
    for path in paths:
        stager.stage(path)
    return True

### Stage input files in parallel 

In [None]:
app_futures = []
for batch in input_batches:
    app_future = stage(batch, workflow_config, logging_dict)
    #print(app_future)
    app_futures.append(app_future)

#Don't continue to step 2 until all files have been staged, only need the next line if running a script
#[a.result() for a in app_futures]

In [None]:
htex_config_local.executors[0].shutdown()
parsl.clear()

### Batch staged filepaths

Now the staged file dir is complete. Moving onto preparing the staged files for rasterization. 

In [9]:
# Get paths to all the newly staged tiles
staged_paths = stager.tiles.get_filenames_from_dir('staged')
staged_paths

['staged/WorldCRS84Quad/11/406/228.gpkg',
 'staged/WorldCRS84Quad/11/407/228.gpkg',
 'staged/WorldCRS84Quad/11/407/229.gpkg',
 'staged/WorldCRS84Quad/11/407/230.gpkg',
 'staged/WorldCRS84Quad/11/407/231.gpkg',
 'staged/WorldCRS84Quad/11/407/233.gpkg',
 'staged/WorldCRS84Quad/11/407/234.gpkg',
 'staged/WorldCRS84Quad/11/407/238.gpkg',
 'staged/WorldCRS84Quad/11/407/239.gpkg',
 'staged/WorldCRS84Quad/11/407/240.gpkg',
 'staged/WorldCRS84Quad/11/407/241.gpkg',
 'staged/WorldCRS84Quad/11/407/242.gpkg',
 'staged/WorldCRS84Quad/11/407/243.gpkg',
 'staged/WorldCRS84Quad/11/407/244.gpkg',
 'staged/WorldCRS84Quad/11/407/245.gpkg',
 'staged/WorldCRS84Quad/11/407/250.gpkg',
 'staged/WorldCRS84Quad/11/407/251.gpkg',
 'staged/WorldCRS84Quad/11/407/254.gpkg',
 'staged/WorldCRS84Quad/11/407/262.gpkg',
 'staged/WorldCRS84Quad/11/407/263.gpkg',
 'staged/WorldCRS84Quad/11/407/264.gpkg',
 'staged/WorldCRS84Quad/11/407/265.gpkg',
 'staged/WorldCRS84Quad/11/407/267.gpkg',
 'staged/WorldCRS84Quad/11/407/268

In [10]:
# check how many staged files, is the batch size 30 reasonable? 19088... sure 30 sounds fine
len(staged_paths) # matches the terminal count

18992

In [11]:
# batch staged files
staged_batches = make_batch(staged_paths, batch_size_rasterization)
len(staged_batches) # 634 batches

634

In [12]:
# see what is within 1 batch
staged_batches[0]

['staged/WorldCRS84Quad/11/406/228.gpkg',
 'staged/WorldCRS84Quad/11/407/228.gpkg',
 'staged/WorldCRS84Quad/11/407/229.gpkg',
 'staged/WorldCRS84Quad/11/407/230.gpkg',
 'staged/WorldCRS84Quad/11/407/231.gpkg',
 'staged/WorldCRS84Quad/11/407/233.gpkg',
 'staged/WorldCRS84Quad/11/407/234.gpkg',
 'staged/WorldCRS84Quad/11/407/238.gpkg',
 'staged/WorldCRS84Quad/11/407/239.gpkg',
 'staged/WorldCRS84Quad/11/407/240.gpkg',
 'staged/WorldCRS84Quad/11/407/241.gpkg',
 'staged/WorldCRS84Quad/11/407/242.gpkg',
 'staged/WorldCRS84Quad/11/407/243.gpkg',
 'staged/WorldCRS84Quad/11/407/244.gpkg',
 'staged/WorldCRS84Quad/11/407/245.gpkg',
 'staged/WorldCRS84Quad/11/407/250.gpkg',
 'staged/WorldCRS84Quad/11/407/251.gpkg',
 'staged/WorldCRS84Quad/11/407/254.gpkg',
 'staged/WorldCRS84Quad/11/407/262.gpkg',
 'staged/WorldCRS84Quad/11/407/263.gpkg',
 'staged/WorldCRS84Quad/11/407/264.gpkg',
 'staged/WorldCRS84Quad/11/407/265.gpkg',
 'staged/WorldCRS84Quad/11/407/267.gpkg',
 'staged/WorldCRS84Quad/11/407/268

### Define parsl function to rasterize in parallel

In [13]:
# Decorators seem to be ignored as the first line of a cell, so print something first
print("Rasterize in parallel")

@python_app
def rasterize(staged_paths, config, logging_dict = logging_dict):
    """
    Rasterize a batch of vector files (step 2)
    """
    import pdgraster
    if logging_dict:
        import logging.config
        logging.config.dictConfig(logging_dict)
    rasterizer = pdgraster.RasterTiler(config)
    raster = rasterizer.rasterize_vectors(staged_paths, make_parents=True)
    # print(tile)
    # print(bounds)
    # print(raster_opts)
    return raster

Rasterize in parallel


### Rasterize all staged tiles (only highest z-level)

First, reload parsl config because already cleared it after staging in parallel. 

In [None]:
# bash command to activate virtual environment
activate_env = 'source /home/jcohen/.bashrc; conda activate pdgviz'

htex_config_local = Config(
  executors = [
      HighThroughputExecutor(
        label = "htex_Local",
        cores_per_worker = 2, 
        max_workers = 2, # why would this be so low? because just testing with small amount of data ?
          # worker_logdir_root = '/' only necessary if the file system is remote, which is not the case for this lake change sample
          # address not necessary because we are not using kubernetes
        worker_debug = False, # don't need this because we have logging setup
          # provider is local for this run thru, kubernetes would use KubernetesProvider()
        provider = LocalProvider(
          channel = LocalChannel(),
          worker_init = activate_env,
          init_blocks = 1, # default I think
          max_blocks = 10 # changed from deafult of 1
        ),
      )
    ],
  )

parsl.clear() # first clear the current configuration since we will likely run this script multiple times
parsl.load(htex_config_local) # load the config we just outlined

In [14]:
app_futures = []
for batch in staged_batches:
    app_future = rasterize(batch, workflow_config, logging_dict)
    app_futures.append(app_future)

# Don't continue to step 3 until all tiles have been rasterized
[a.result() for a in app_futures]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [15]:
htex_config_local.executors[0].shutdown()
parsl.clear()

In [16]:
# ensure we have the same number of GeoTIFF files in z-level 11 as we do staged vector tiles (which are only z-level 11)
#geotiff_paths = rasterizer.tiles.get_filenames_from_dir('/home/jcohen/viz-workflow/geotiff/WorldCRS84Quad/11')
# just want geotiff paths from z-level 11 because all other z-levels are parent tiles, but just count number of tiles in terminal cause it's easier
len_geotiff_paths = 18991
len_geotiff_paths == len(staged_paths)
# False with 1 file erroring, ambigious message, gonna ignore that file for now

False

In [17]:
len(staged_paths)

18992

### Create Web Tiles from geoTIFF's

In [21]:
# Update ranges
rasterizer.update_ranges()

In [26]:
# Process web tiles in batches
geotiff_paths = tile_manager.get_filenames_from_dir('geotiff')
geotiff_batches = make_batch(geotiff_paths, batch_size_web_tiles)


In [27]:
len(geotiff_batches) # 134 batches

134

In [28]:
len(geotiff_batches[0]) # 200 in each batch, a huge increase from the batch size of 30 when rasterizing

200

### Re-initialize parsl

Because I shut it down after rasterization

In [29]:
# bash command to activate virtual environment
activate_env = 'source /home/jcohen/.bashrc; conda activate pdgviz'

htex_config_local = Config(
  executors = [
      HighThroughputExecutor(
        label = "htex_Local",
        cores_per_worker = 2, 
        max_workers = 2, # why would this be so low? because just testing with small amount of data ?
          # worker_logdir_root = '/' only necessary if the file system is remote, which is not the case for this lake change sample
          # address not necessary because we are not using kubernetes
        worker_debug = False, # don't need this because we have logging setup
          # provider is local for this run thru, kubernetes would use KubernetesProvider()
        provider = LocalProvider(
          channel = LocalChannel(),
          worker_init = activate_env,
          init_blocks = 1, # default I think
          max_blocks = 10 # changed from deafult of 1
        ),
      )
    ],
  )

parsl.clear() # first clear the current configuration since we will likely run this script multiple times
parsl.load(htex_config_local) # load the config we just outlined

<parsl.dataflow.dflow.DataFlowKernel at 0x7f61645ca3e0>

In [30]:
# Create a batch of webtiles from geotiffs (step 4)
@python_app
def create_web_tiles(geotiff_paths, config, logging_dict = logging_dict):
    """
    Create a batch of webtiles from geotiffs
    """
    import pdgraster
    if logging_dict:
        import logging.config
        logging.config.dictConfig(logging_dict)
    rasterizer = pdgraster.RasterTiler(config)
    return rasterizer.webtiles_from_geotiffs(
        geotiff_paths, update_ranges=False) # already manually updates ranges in chunk above, don't need to do it twice

In [31]:
app_futures = []
for batch in geotiff_batches:
    app_future = create_web_tiles(batch, workflow_config, logging_dict)
    app_futures.append(app_future)

# Don't record end time until all web tiles have been created
[a.result() for a in app_futures]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [36]:
# check that the same amount of web tiles and rasters
geotiff_paths = rasterizer.tiles.get_filenames_from_dir('geotiff')
len(geotiff_paths) #26656
# 53312 total, for coverage and polygon_count
53312/2 # perfect! 26656

26656.0

### Create 3D Web Tiles

At [this](https://github.com/PermafrostDiscoveryGateway/viz-workflow/blob/0beb3b14239f2dd8cd4329026dc8d9a41aece7d7/pdg_workflow/pdg_workflow.py#L226) stage in the `parsl` workflow.

We create only the highest z-level. Deduplication does not occur at this step, because we set the config for this workflow to occur at staging. 

In [39]:
staged_batches = make_batch(staged_paths, batch_size_3dtiles) # batch size = 20

Manually define 3 things within the `parsl` app:

1. class for StagedTo3DConverter
2. first function from TreeGenerator.py to create leaf tile in a Cesium 3D tileset tree: `leaf_tile_from_gdf()`
3. second function from TreeGenerator.py to create a parent tile in a Cesium 3D tileset tree

The latter 2 were copied from [this script](https://github.com/PermafrostDiscoveryGateway/viz-3dtiles/blob/5597407f74cb4200d776dcc0185b7a67b73693fa/viz_3dtiles/TreeGenerator.py).

In [51]:
# print("create 3d webtiles in parallel")

# @python_app
# def create_leaf_3dtiles(staged_paths, config, logging_dict = logging_dict):
#     """
#     Create a batch of leaf 3d tiles from staged vector tiles
#     """
#     import pdgstaging
#     # from pdg_workflow import StagedTo3DConverter manually define it instead:

#     # ------------------------------------------------------------------------

#     class StagedTo3DConverter():
#         """
#         Processes staged vector data into Cesium 3D tiles according to the
#         settings in a config file or dict. This class acts as the orchestrator
#         of the other viz-3dtiles classes, and coordinates the sending and
#         receiving of information between them.
#         """

#         def __init__(
#             self,
#             config
#         ):
#             """
#                 Initialize the StagedTo3DConverter class.
#                 Parameters
#                 ----------
#                 config : dict or str
#                     A dictionary of configuration settings or a path to a config
#                     JSON file. (See help(pdgstaging.ConfigManager))
#             """

#             self.config = pdgstaging.ConfigManager(config)
#             self.tiles = pdgstaging.TilePathManager(
#                 **self.config.get_path_manager_config())

#         def all_staged_to_3dtiles(
#             self
#         ):
#             """
#                 Process all staged vector tiles into 3D tiles.
#             """

#             # Get the list of staged vector tiles
#             paths = self.tiles.get_filenames_from_dir('staged')
#             # Process each tile
#             for path in paths:
#                 self.staged_to_3dtile(path)

#         def staged_to_3dtile(self, path):
#             """
#                 Convert a staged vector tile into a B3DM tile file and a matching
#                 JSON tileset file.
#                 Parameters
#                 ----------
#                 path : str
#                     The path to the staged vector tile.
#                 Returns
#                 -------
#                 tile, tileset : Cesium3DTile, Tileset
#                     The Cesium3DTiles and Cesium3DTileset objects
#             """

#             try:

#                 # Get information about the tile from the path
#                 tile = self.tiles.tile_from_path(path)
#                 out_path = self.tiles.path_from_tile(tile, '3dtiles')

#                 tile_bv = self.bounding_region_for_tile(tile)

#                 # Get the filename of the tile WITHOUT the extension
#                 tile_filename = os.path.splitext(os.path.basename(out_path))[0]
#                 # Get the base of the path, without the filename
#                 tile_dir = os.path.dirname(out_path) + os.path.sep

#                 # Log the event
#                 logger.info(
#                     f'Creating 3dtile from {path} for tile {tile} to {out_path}.')

#                 # Read in the staged vector tile
#                 gdf = gpd.read_file(path)

#                 # Check if the gdf is empty
#                 if len(gdf) == 0:
#                     logger.warning(
#                         f'Vector tile {path} is empty. 3D tile will not be'
#                         ' created.')
#                     return

#                 # Remove polygons with centroids that are outside the tile boundary
#                 prop_cent_in_tile = self.config.polygon_prop(
#                     'centroid_within_tile')
#                 gdf = gdf[gdf[prop_cent_in_tile]]

#                 # Check if deduplication should be performed
#                 dedup_here = self.config.deduplicate_at('3dtiles')
#                 dedup_method = self.config.get_deduplication_method()

#                 # Deduplicate if required
#                 if dedup_here and (dedup_method is not None):
#                     dedup_config = self.config.get_deduplication_config(gdf)
#                     dedup = dedup_method(gdf, **dedup_config)
#                     gdf = dedup['keep']

#                     # The tile could theoretically be empty after deduplication
#                     if len(gdf) == 0:
#                         logger.warning(
#                             f'Vector tile {path} is empty after deduplication.'
#                             ' 3D Tile will not be created.')
#                         return

#                 # Create & save the b3dm file
#                 ces_tile, ces_tileset = TreeGenerator.leaf_tile_from_gdf(
#                     gdf,
#                     dir=tile_dir,
#                     filename=tile_filename,
#                     z=self.config.get('z_coord'),
#                     geometricError=self.config.get('geometricError'),
#                     tilesetVersion=self.config.get('version'),
#                     boundingVolume=tile_bv
#                 )

#                 return ces_tile, ces_tileset

#             except Exception as e:
#                 logger.error(f'Error creating 3D Tile from {path}.')
#                 logger.error(e)

#         def parent_3dtiles_from_children(self, tiles, bv_limit=None):
#             """
#                 Create parent Cesium 3D Tileset json files that point to of child
#                 JSON files in the tile tree hierarchy. This method will take a list
#                 of parent tiles and search the 3D tile directory for any children
#                 tiles to create.
#                 Parameters
#                 ----------
#                 tiles : list of morecantile.Tile
#                     The list of parent tiles to create.
#             """

#             tile_manager = self.tiles
#             config_manager = self.config

#             tileset_objs = []

#             # Make the next level of parent tiles
#             for parent_tile in tiles:
#                 # Get the path to the parent tile
#                 parent_path = tile_manager.path_from_tile(parent_tile, '3dtiles')
#                 # Get just the base dir without the filename
#                 parent_dir = os.path.dirname(parent_path)
#                 # Get the filename of the parent tile, without the extension
#                 parent_filename = os.path.basename(parent_path)
#                 parent_filename = os.path.splitext(parent_filename)[0]
#                 # Get the children paths for this parent tile
#                 child_paths = tile_manager.get_child_paths(parent_tile, '3dtiles')
#                 # Remove paths that do not exist
#                 child_paths = tile_manager.remove_nonexistent_paths(child_paths)
#                 # Get the parent bounding volume
#                 parent_bv = self.bounding_region_for_tile(
#                     parent_tile, limit_to=bv_limit)
#                 # If the bounding region is outside t
#                 # Get the version
#                 version = config_manager.get('version')
#                 # Get the geometric error
#                 geometric_error = config_manager.get('geometricError')
#                 # Create the parent tile
#                 tileset_obj = TreeGenerator.parent_tile_from_children_json(
#                     child_paths,
#                     dir=parent_dir,
#                     filename=parent_filename,
#                     geometricError=geometric_error,
#                     tilesetVersion=version,
#                     boundingVolume=parent_bv
#                 )
#                 tileset_objs.append(tileset_obj)

#             return tileset_objs

#         def bounding_region_for_tile(self, tile, limit_to=None):
#             """
#             For a morecantile.Tile object, return a BoundingVolumeRegion object
#             that represents the bounding region of the tile.
#             Parameters
#             ----------
#             tile : morecantile.Tile
#                 The tile object.
#             limit_to : list of float
#                 Optional list of west, south, east, north coordinates to limit
#                 the bounding region to.
#             Returns
#             -------
#             bv : BoundingVolumeRegion
#                 The bounding region object.
#             """
#             tms = self.tiles.tms
#             bounds = tms.bounds(tile)
#             bounds = gpd.GeoSeries(
#                 box(bounds.left, bounds.bottom, bounds.right, bounds.top),
#                 crs=tms.crs)
#             if limit_to is not None:
#                 bounds_limitor = gpd.GeoSeries(
#                     box(limit_to[0], limit_to[1], limit_to[2], limit_to[3]),
#                     crs=tms.crs)
#                 bounds = bounds.intersection(bounds_limitor)
#             bounds = bounds.to_crs(BoundingVolumeRegion.CESIUM_EPSG)
#             bounds = bounds.total_bounds

#             region_bv = {
#                 'west': bounds[0], 'south': bounds[1],
#                 'east': bounds[2], 'north': bounds[3],
#             }
#             return region_bv

#         def make_top_level_tileset(self):
#             """
#             Create a top-level tileset.json file that sets all the min_z level
#             tiles as its children. This is needed to display the tiles in Cesium
#             when the min_z level has more than one tile.
#             Returns
#             -------
#             tileset : Tileset
#                 The Cesium3DTileset object
#             """

#             tile_manager = self.tiles
#             config_manager = self.config
#             min_z = config_manager.get_min_z()

#             # Make a parent tileset.json - this will combine the top level tiles if
#             # there are 2, otherwise it will just refer to the top level tile.
#             top_level_tiles = tile_manager.get_filenames_from_dir(
#                 '3dtiles', z=min_z)
#             top_level_dir = tile_manager.get_base_dir('3dtiles')['path']

#             return TreeGenerator.parent_tile_from_children_json(
#                 children=top_level_tiles,
#                 dir=top_level_dir
#             )


#         # ------------------------------------------------------------------------

#         # also import other function:
#     def leaf_tile_from_gdf(
#         gdf,
#         dir='',
#         filename='tileset',
#         crs=None,
#         z=0,
#         geometricError=None,
#         tilesetVersion=None,
#         boundingVolume=None,
#         minify_json=True
#     ):
#         """
#         Create a leaf tile in a Cesium 3D tileset tree. Convert a GeoDataFrame of
#         polygons into a Cesium3DTile B3DM file and Cesium3DTileset JSON file.
#         Parameters
#         ----------
#         gdf : GeoDataFrame
#             A GeoDataFrame containing polygons to be converted to a Cesium tile.
#         dir : str
#             The directory to save both the JSON and B3DM files to. If the directory
#             does not exist, it will be created.
#         filename : str
#             The base filename for the tile, excluding base directory and extension.
#             The JSON and B3DM files will be saved as <filename>.json and
#             <filename>.b3dm. Default is 'tileset'.
#         crs : str
#             The coordinate reference system of the GeoDataFrame, if the GeoDataFrame
#             does not have a CRS set.
#         z : int
#             If the GeoDataFrame does not have a Z coordinate, then the Z coordinate
#             will be set to this value. Default is 0.
#         geometricError : float
#             The geometric error of the tile. If None (default), the geometric error
#             will be the max_width calculated when creating the Cesium3DTile (B3DM).
#         tilesetVersion : str
#             An application specific version for the tileset (optional).
#         boundingVolume : list or dict
#             A root bounding volume for the tile. If None (default), the bounding
#             volume will be the calculated oriented bounding box (OBB) of the
#             GeoDataFrame. The OBB will be used for the content bounding volume in
#             either case.
#         minify_json : bool
#             Whether to minify the JSON file. Default is True.
#         Returns
#         -------
#         tile, tileset : Cesium3DTile, Tileset
#             The Cesium3DTiles and Cesium3DTileset objects
#         """
#         tile = Cesium3DTile()
#         tile.save_to = dir
#         tile.save_as = filename
#         tile.from_geodataframe(gdf, crs=crs, z=z)
#         gdf = tile.geodataframe
#         tile_bounding_volume = BoundingVolume.from_gdf(gdf)
#         tile.get_filename()

#         # Only set the optional content bounding volume if it differs from the root
#         # tile bounding volume
#         root_bounding_volume = tile_bounding_volume
#         content_bounding_volume = None
#         if(boundingVolume):
#             root_bounding_volume = BoundingVolume(boundingVolume)
#             content_bounding_volume = tile_bounding_volume

#         asset = Asset(tilesetVersion=tilesetVersion)

#         content = Content(
#             uri=tile.get_filename(),
#             boundingVolume=content_bounding_volume
#         )

#         root_tile_data = {
#             'boundingVolume': root_bounding_volume,
#             'geometricError': geometricError or tile.max_width,
#             'content': content
#         }
#         tileset_data = {
#             'asset': asset,
#             'geometricError': geometricError or tile.max_width,
#             'root': root_tile_data
#         }
#         tileset = Tileset(**tileset_data)
#         json_path = os.path.join(dir, filename + '.json')
#         tileset.to_file(json_path, minify=minify_json)
#         return tile, tileset

# # ------------------------------------------------------------------------

# # and import final 3rd function 

# # ------------------------------------------------------------------------

#     def parent_tile_from_children_json(
#         children,
#         dir='',
#         filename='tileset',
#         geometricError=None,
#         tilesetVersion=None,
#         boundingVolume=None,
#         boundingVolumeSource="content",
#         minify_json=True
#     ):
#         """
#         Create a parent tile in a Cesium 3D tileset tree. The parent tile will
#         inherit properties such as extensionsUsed, extras, properties and
#         root.refine, root.transform, etc. from the first child tile. Other
#         properties are calculated or can be specified with the geometricError,
#         tilesetVersion, and boundingVolume parameters.
#         Parameters
#         ----------
#         children : list of str or list of Tileset
#             A list of JSON files or Cesium3DTiles that the parent tile should point
#             to. All child tiles must be saved to files, and their file paths must
#             be in the same format as the dir parameter for this function. This is
#             because the method calculates the relative path the child JSON files
#             from the starting from the path where the parent JSON file will be
#             saved.
#         dir : str
#             The directory to save the parent JSON file to. If the directory does
#             not exist, it will be created. If the path is relative, then the
#             children file paths must also be relative. If the path is absolute,
#             then the children file paths must be absolute.
#         filename : str
#             The base filename for the tile, excluding base directory and extension.
#             The JSON file will be saved as <filename>.json. Default is 'tileset'.
#         geometricError : float
#             The geometric error of the tile. If None (default), the max of the
#             child geometric errors will be used.
#         tilesetVersion : str
#             An application specific version for the tileset (optional). If None,
#             the tilesetVersion from the first child tile will be used.
#         boundingVolume : list or dict
#             A root bounding volume for the tile. If None (default), the bounding
#             volume will be the calculated as the union of the child bounding
#             volumes.
#         boundingVolumeSource : "root" or "content"
#             When a boundingVolume is not set, then which of each child's bounding
#             volumes should be used to calculate the parent tile's bounding volume.
#             This method is passed to bv_source parameter in the Tile.add_children
#             method. If set to "content" (default), then the method will first
#             search for a child's content bounding volume, and will add it to the
#             tile's root bounding volume if they exist. If a child has no content
#             bounding volume, then the root bounding volume will be added instead.
#         minify_json : bool
#             Whether to minify the JSON file. Default is True.
#         Returns
#         -------
#         tileset : Tileset
#             The Cesium3DTileset object
#         """

#         if not isinstance(children, (list, tuple)):
#             children = [children]

#         # Check the tileset children
#         child_paths = []
#         if all(isinstance(child, str) for child in children):
#             child_paths = children
#         elif all(isinstance(child, Tileset) for child in children):
#             if any(child.file_path is None for child in children):
#                 raise ValueError(
#                     'Child tilesets must all be saved to a file before '
#                     'being added to a parent tile. This is required because the parent '
#                     'tile needs relative paths to the child tileset JSON.')
#             child_paths = [child.file_path for child in children]
#         else:
#             raise ValueError(
#                 'Children must be a list of paths or Tileset objects.')

#         # Check that all the child JSON files exist
#         if any(not os.path.exists(child_path) for child_path in child_paths):
#             raise ValueError('One or more child JSON files does not exist.')

#         child_geo_errors = []
#         child_tilesets = []
#         child_root_tiles = []
#         rel_child_paths = []

#         for i in range(len(child_paths)):
#             # Read in the relevant parts of the child data
#             cp = child_paths[i]
#             child_tileset = Tileset.from_file(cp)
#             child_root = child_tileset.root
#             rel_path_to_child = os.path.relpath(cp, dir)
#             geometric_error = child_tileset.geometricError
#             child_root.children = None
#             # Append child data parts to lists
#             child_geo_errors.append(geometric_error)
#             child_tilesets.append(child_tileset)
#             child_root_tiles.append(child_root)
#             rel_child_paths.append(rel_path_to_child)

#         # Use the first child's tileset info to create the parent tileset
#         new_tileset = child_tilesets[0].copy()
#         new_tileset.root.content = None
#         new_tileset.root.children = None

#         # Add the children to the parent tileset
#         bv_method = 'replace' if boundingVolume is None else None
#         bv_source = boundingVolumeSource
#         new_tileset.add_children(child_root_tiles, bv_method, bv_source)

#         # All bv info from children is now in parent. Update the children content
#         # to only contain the URI for the child json, relative to the new parent
#         # json
#         for i in range(len(child_root_tiles)):
#             child = new_tileset.root.children[i]
#             child.content = Content(uri=rel_child_paths[i])

#         # Update other parameters to the parent tileset
#         if boundingVolume:
#             new_tileset.root.boundingVolume = BoundingVolume(boundingVolume)

#         if tilesetVersion:
#             new_tileset.asset.tilesetVersion = tilesetVersion

#         if geometricError is not None:
#             new_tileset.geometricError = geometricError
#         else:
#             new_tileset.geometricError = max(child_geo_errors)

#         # make output directory if it doesn't exist, then save
#         if not os.path.exists(dir):
#             os.makedirs(dir, exist_ok=True)
#         out_path = os.path.join(dir, filename + '.json')
#         new_tileset.to_file(out_path, minify=minify_json)
#         return new_tileset

#     if logging_dict:
#         import logging.config
#         logging.config.dictConfig(logging_dict)
#     converter3d = StagedTo3DConverter(config)
#     tilesets = []
#     for path in staged_paths:
#         ces_tile, ces_tileset = converter3d.staged_to_3dtile(path)
#         tilesets.append(ces_tileset)
#     return tilesets

create 3d webtiles in parallel


In [53]:
print("create 3d webtiles in parallel")

@python_app
def create_leaf_3dtiles(staged_paths, config, logging_dict = logging_dict):
    """
    Create a batch of leaf 3d tiles from staged vector tiles
    """
    import pdgstaging
    from viz_3dtiles import TreeGenerator, BoundingVolumeRegion
    
    # from pdg_workflow import StagedTo3DConverter manually define it instead:

    # ------------------------------------------------------------------------

    class StagedTo3DConverter():
        """
        Processes staged vector data into Cesium 3D tiles according to the
        settings in a config file or dict. This class acts as the orchestrator
        of the other viz-3dtiles classes, and coordinates the sending and
        receiving of information between them.
        """

        def __init__(
            self,
            config
        ):
            """
                Initialize the StagedTo3DConverter class.
                Parameters
                ----------
                config : dict or str
                    A dictionary of configuration settings or a path to a config
                    JSON file. (See help(pdgstaging.ConfigManager))
            """

            self.config = pdgstaging.ConfigManager(config)
            self.tiles = pdgstaging.TilePathManager(
                **self.config.get_path_manager_config())

        def all_staged_to_3dtiles(
            self
        ):
            """
                Process all staged vector tiles into 3D tiles.
            """

            # Get the list of staged vector tiles
            paths = self.tiles.get_filenames_from_dir('staged')
            # Process each tile
            for path in paths:
                self.staged_to_3dtile(path)

        def staged_to_3dtile(self, path):
            """
                Convert a staged vector tile into a B3DM tile file and a matching
                JSON tileset file.
                Parameters
                ----------
                path : str
                    The path to the staged vector tile.
                Returns
                -------
                tile, tileset : Cesium3DTile, Tileset
                    The Cesium3DTiles and Cesium3DTileset objects
            """

            try:

                # Get information about the tile from the path
                tile = self.tiles.tile_from_path(path)
                out_path = self.tiles.path_from_tile(tile, '3dtiles')

                tile_bv = self.bounding_region_for_tile(tile)

                # Get the filename of the tile WITHOUT the extension
                tile_filename = os.path.splitext(os.path.basename(out_path))[0]
                # Get the base of the path, without the filename
                tile_dir = os.path.dirname(out_path) + os.path.sep

                # Log the event
                logger.info(
                    f'Creating 3dtile from {path} for tile {tile} to {out_path}.')

                # Read in the staged vector tile
                gdf = gpd.read_file(path)

                # Check if the gdf is empty
                if len(gdf) == 0:
                    logger.warning(
                        f'Vector tile {path} is empty. 3D tile will not be'
                        ' created.')
                    return

                # Remove polygons with centroids that are outside the tile boundary
                prop_cent_in_tile = self.config.polygon_prop(
                    'centroid_within_tile')
                gdf = gdf[gdf[prop_cent_in_tile]]

                # Check if deduplication should be performed
                dedup_here = self.config.deduplicate_at('3dtiles')
                dedup_method = self.config.get_deduplication_method()

                # Deduplicate if required
                if dedup_here and (dedup_method is not None):
                    dedup_config = self.config.get_deduplication_config(gdf)
                    dedup = dedup_method(gdf, **dedup_config)
                    gdf = dedup['keep']

                    # The tile could theoretically be empty after deduplication
                    if len(gdf) == 0:
                        logger.warning(
                            f'Vector tile {path} is empty after deduplication.'
                            ' 3D Tile will not be created.')
                        return

                # Create & save the b3dm file
                ces_tile, ces_tileset = TreeGenerator.leaf_tile_from_gdf(
                    gdf,
                    dir=tile_dir,
                    filename=tile_filename,
                    z=self.config.get('z_coord'),
                    geometricError=self.config.get('geometricError'),
                    tilesetVersion=self.config.get('version'),
                    boundingVolume=tile_bv
                )

                return ces_tile, ces_tileset

            except Exception as e:
                logger.error(f'Error creating 3D Tile from {path}.')
                logger.error(e)

        def parent_3dtiles_from_children(self, tiles, bv_limit=None):
            """
                Create parent Cesium 3D Tileset json files that point to of child
                JSON files in the tile tree hierarchy. This method will take a list
                of parent tiles and search the 3D tile directory for any children
                tiles to create.
                Parameters
                ----------
                tiles : list of morecantile.Tile
                    The list of parent tiles to create.
            """

            tile_manager = self.tiles
            config_manager = self.config

            tileset_objs = []

            # Make the next level of parent tiles
            for parent_tile in tiles:
                # Get the path to the parent tile
                parent_path = tile_manager.path_from_tile(parent_tile, '3dtiles')
                # Get just the base dir without the filename
                parent_dir = os.path.dirname(parent_path)
                # Get the filename of the parent tile, without the extension
                parent_filename = os.path.basename(parent_path)
                parent_filename = os.path.splitext(parent_filename)[0]
                # Get the children paths for this parent tile
                child_paths = tile_manager.get_child_paths(parent_tile, '3dtiles')
                # Remove paths that do not exist
                child_paths = tile_manager.remove_nonexistent_paths(child_paths)
                # Get the parent bounding volume
                parent_bv = self.bounding_region_for_tile(
                    parent_tile, limit_to=bv_limit)
                # If the bounding region is outside t
                # Get the version
                version = config_manager.get('version')
                # Get the geometric error
                geometric_error = config_manager.get('geometricError')
                # Create the parent tile
                tileset_obj = TreeGenerator.parent_tile_from_children_json(
                    child_paths,
                    dir=parent_dir,
                    filename=parent_filename,
                    geometricError=geometric_error,
                    tilesetVersion=version,
                    boundingVolume=parent_bv
                )
                tileset_objs.append(tileset_obj)

            return tileset_objs

        def bounding_region_for_tile(self, tile, limit_to=None):
            """
            For a morecantile.Tile object, return a BoundingVolumeRegion object
            that represents the bounding region of the tile.
            Parameters
            ----------
            tile : morecantile.Tile
                The tile object.
            limit_to : list of float
                Optional list of west, south, east, north coordinates to limit
                the bounding region to.
            Returns
            -------
            bv : BoundingVolumeRegion
                The bounding region object.
            """
            tms = self.tiles.tms
            bounds = tms.bounds(tile)
            bounds = gpd.GeoSeries(
                box(bounds.left, bounds.bottom, bounds.right, bounds.top),
                crs=tms.crs)
            if limit_to is not None:
                bounds_limitor = gpd.GeoSeries(
                    box(limit_to[0], limit_to[1], limit_to[2], limit_to[3]),
                    crs=tms.crs)
                bounds = bounds.intersection(bounds_limitor)
            bounds = bounds.to_crs(BoundingVolumeRegion.CESIUM_EPSG)
            bounds = bounds.total_bounds

            region_bv = {
                'west': bounds[0], 'south': bounds[1],
                'east': bounds[2], 'north': bounds[3],
            }
            return region_bv

        def make_top_level_tileset(self):
            """
            Create a top-level tileset.json file that sets all the min_z level
            tiles as its children. This is needed to display the tiles in Cesium
            when the min_z level has more than one tile.
            Returns
            -------
            tileset : Tileset
                The Cesium3DTileset object
            """

            tile_manager = self.tiles
            config_manager = self.config
            min_z = config_manager.get_min_z()

            # Make a parent tileset.json - this will combine the top level tiles if
            # there are 2, otherwise it will just refer to the top level tile.
            top_level_tiles = tile_manager.get_filenames_from_dir(
                '3dtiles', z=min_z)
            top_level_dir = tile_manager.get_base_dir('3dtiles')['path']

            return TreeGenerator.parent_tile_from_children_json(
                children=top_level_tiles,
                dir=top_level_dir
            )

create 3d webtiles in parallel


In [54]:
app_futures = []
for batch in staged_batches:
    app_future = create_leaf_3dtiles(batch, workflow_config, logging_dict)
    app_futures.append(app_future)

# Don't continue to step 6 until all max-zoom level 3d tilesets have been created
[a.result() for a in app_futures]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

The above chunk ran, but it did not create a dir for 3Dtiles. When this occurred with rasterization, I switched the make_parents argument from False to True and it created the dir. I don't think that is possible with the 3d tiles creation, tho.