In [None]:
import os
import shutil
import glob
import subprocess
import pandas as pd
import geopandas as gpd
import dask
from dask.distributed import Client, progress, LocalCluster
from pyFIRS.wrappers import lastools

Launch a parallel computing cluster. 

In [None]:
cluster=LocalCluster(scheduler_port=7001, diagnostics_port=7002)
c = Client(cluster)
num_cores = len(c.ncores()) # identify how many workers we have

At this point, you should also be able to view an interactive dashboard on port 7002. If you're executing this on a remote server, you'll need to set up port forward so you can view the dashboard on your local machine's browser. Once you've done that, or if you're processing on your own machine, you can view the dashboard at [http://localhost:7002/status](http://localhost:7002/status).

In [None]:
las = lastools.useLAStools('/storage/lidar/LAStools/bin')

In [None]:
# where the imported lidar data is currently stored
workdir = os.path.abspath('/storage/lidar/odf_northwest_2015/wilkerson/')
# define data handling directories
processed = os.path.join(workdir,'processed')

# the coordinate reference system we'll be working with
target_epsg = 26910 # utm 10 N

In [None]:
# push our working directories and wrapper classes to the workers on the cluster as well
c.scatter([processed, las, target_epsg, num_cores], broadcast=True);

# Merge tiled derivative outputs together
Merge all the tiled GeoTiffs and Shapefiles into single overview files.

We'll produce a shapefile showing the layout of the non-buffered tiles as a single shapefile. This is a single process that takes a few seconds to run, so no need to distribute it using `dask`.

In [None]:
@dask.delayed
def tiles_overview(*args, **kwargs):
    odir = os.path.join(processed, 'vectors')
    
    if os.path.exists(os.path.join(processed, 'vectors', 'tiles.shp')):
        pass
    else:
        proc = las.lasboundary(i=os.path.join(processed, 'points', '*.laz'),
                               use_bb=True, # use bounding box of tiles
                               overview=True,
                               labels=True,
                               cores=num_cores, # use parallel processing
                               oshp=True,
                               o=os.path.join(processed, 'vectors', 'tiles.shp'))
    return

Merge the bare earth tiles into a single GeoTiff.

In [None]:
@dask.delayed
def merge_dem(*args, **kwargs):
    infiles = os.path.join(processed, 'rasters', 'DEM_tiles', '*.tif')
    outfile = os.path.join(processed, 'rasters', 'dem.tif')
    
    if os.path.exists(outfile):
        return
    else:
        return subprocess.run(['rio', 'merge', *glob.glob(infiles), outfile, '--co', 'compress=LZW',
                              '--co', 'tiled=true', '--co', 'blockxsize=256', '--co', 'blockysize=256'],
                              stderr=subprocess.PIPE, stdout=subprocess.PIPE)

Now merge the hillshade tiles into a single raster formatted as GeoTiff.

In [None]:
@dask.delayed
def merge_hillshade(*args, **kwargs):
    infiles = os.path.join(processed, 'rasters', 'hillshade_tiles', '*.tif')
    outfile = os.path.join(processed, 'rasters', 'hillshade.tif')

    if os.path.exists(outfile):
        return
    else:
        return subprocess.run(['rio', 'merge', *glob.glob(infiles), outfile, '--co', 'compress=LZW',
                              '--co', 'tiled=true', '--co', 'blockxsize=256', '--co', 'blockysize=256'],
                              stderr=subprocess.PIPE, stdout=subprocess.PIPE)

Merge the trimmed canopy height model tiles into a single raster.

In [None]:
@dask.delayed
def merge_chm(*args, **kwargs):
    infiles = os.path.join(processed, 'rasters', 'chm_tiles', '*.tif')
    outfile = os.path.join(processed, 'rasters', 'chm.tif')
    
    if os.path.exists(outfile):
        pass
    else:
        proc = subprocess.run(['rio', 'merge', *glob.glob(infiles), outfile, '--co', 'compress=LZW',
                              '--co', 'tiled=true', '--co', 'blockxsize=256', '--co', 'blockysize=256'],
                              stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    return

Merge the cleaned tiles of building footprints together into a single shapefile. We'll use `geopandas` to concatenate all the polygons together into a single geodataframe and then write out to a new shapefile.

In [None]:
@dask.delayed
def merge_bldgs(*args, **kwargs):
    
    if os.path.exists(os.path.join(processed,'vectors','buildings.shp')):
        pass
    else:
        building_tiles = glob.glob(os.path.join(processed, 'vectors', 'building_tiles', '*.shp'))
        # create a list of geodataframes containing the tiles of building footprints
        gdflist = [gpd.read_file(tile) for tile in building_tiles]
        # merge them all together
        merged = gpd.GeoDataFrame(pd.concat(gdflist, ignore_index=True))
        # using pandas' concat caused us to lose projection information, so let's add that back in
        merged.crs = gdflist[0].crs
        # and write the merged data to a new shapefile
        merged.to_file(os.path.join(processed,'vectors','buildings.shp'))

    return

A single state that will depend upon the completion of the merged rasters and vectors.

In [None]:
@dask.delayed
def merge_done(*args, **kwargs):
    return

In [None]:
# building the computation receipe
merge_dsk = {}
merge_dsk['tiles_over'] = (tiles_overview, ['tiles_done'])
merge_dsk['merge_bldgs'] = (merge_bldgs, ['tiles_done'])
merge_dsk['merge_hill'] = (merge_hillshade, ['tiles_done'])
merge_dsk['merge_dem'] = (merge_dem, ['tiles_done'])
merge_dsk['merge_chm'] = (merge_chm, ['tiles_done'])
merge_dsk['merge_done']=(merge_done, ['tiles_over', 'merge_bldgs', 'merge_hill', 'merge_dem', 'merge_chm'])

In [None]:
merge_graph = c.get(merge_dsk, 'merge_done') # build the computation graph
merge_results = c.persist(merge_graph) # this might take a while...
progress(merge_results)

In [None]:
# c.cancel(merge_results)

In [None]:
c.close()
cluster.close()