In [None]:
import os
import glob
import numpy as np
import subprocess
import geopandas as gpd
import pandas as pd
import dask
from dask.distributed import LocalCluster, Client, progress

from pyFIRS.wrappers import lastools
from pyFIRS.utils import (make_buffered_fishnet, get_intersecting_tiles, 
                          PipelineError, inspect_failures)

from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.patches import Patch
import seaborn as sns
sns.set_style('darkgrid')
%matplotlib inline

In [None]:
WORKDIR = os.path.abspath('F:/willamette-valley_2009/')
TARGET_EPSG = 6339 # UTM 10N
BUFFER = 50

In [None]:
las = lastools.useLAStools('C:/Program Files/LAStools/bin')

In [None]:
RAW = os.path.join(WORKDIR, 'raw')
INTERIM = os.path.join(WORKDIR, 'interim')

In [None]:
orig_tiles = gpd.read_file(os.path.join(RAW, 'raw_tileindex.shp'))
orig_tiles.head()

In [None]:
new_tiles = make_buffered_fishnet(*orig_tiles.unary_union.bounds, 
                                  orig_tiles.crs, 
                                  buffer=BUFFER, 
                                  spacing=1000)
new_tiles.head()

In [None]:
intersecting_tiles = get_intersecting_tiles(orig_tiles, 
                                            new_tiles)
intersecting_tiles['intersecting_files'] = \
intersecting_tiles['intersecting_files'].apply(lambda row: ' '.join([os.path.join(RAW, x) for x in row.split(' ')]))
intersecting_tiles.head()

### Setting up parallel computing using `dask.distributed`
`LAStools` offers native multi-core processing as an optional argument (`cores`) supplied to its command-line tools. `FUSION` command line tools do not. To enable parallel processing of `FUSION` commands, we'll use `dask.distributed` to schedule the processing of tiles in asynchronous parallel batches. This approach also offers us the ability to track progress using a progress bar.

You'll first need to launch a parallel computing cluster. 

In [None]:
cluster=LocalCluster(scheduler_port=7001, dashboard_address=7002)
c = Client(cluster)

At this point, you should also be able to view an interactive dashboard on port 7002. If you're executing this on a remote server, you'll need to set up port forward so you can view the dashboard on your local machine's browser. Once you've done that, or if you're processing on your own machine, you can view the dashboard at [http://localhost:7002/status](http://localhost:7002/status).

In [None]:
# push our working directories and wrapper classes to the workers on the cluster as well
c.scatter([WORKDIR, RAW, INTERIM,
           las, intersecting_tiles,
           TARGET_EPSG], 
          broadcast=True);

In [None]:
def log_error(tile_id, process, error_msg):
    logfile = os.path.join(INTERIM, 'retiled', 'failed', tile_id + '.txt')
    os.makedirs(os.path.dirname(logfile), exist_ok=True)

    with open(logfile, '+w') as f:
        f.write('{} | {}: {}'.format(tile_id, process, error_msg))

    return


def has_error(tile_id):
    errors = glob.glob(os.path.join(RAW, 'failed', '*.txt'))
    tiles_with_errors = [fname(error) for error in errors]
    if tile_id in tiles_with_errors:
        return True
    else:
        return False

In [None]:
def parse_coords_from_tileid(tile_id):
    """Get the coordinates of the lower left corner of the tile, assuming the tile 
    has been named in the pattern {XMIN}_{YMIN}_{RASTERLENGTH}.
    
    Parameters
    ----------
    tile_id : string
        assumed tile_id follows the naming convention of {LLX}_{LLY}_{LENGTH} where 
        LLX = x-coordinate of lower-left corner of tile (in projected units)
        LLY = y-coordinate of lower-left corner of tile (in projected units)
        LENGTH = length of the raster (in projected units), assumed to be a square tile shape
    
    Returns
    -------
    llx, lly, length : int
        x- and y- coordinates of lower-left corner and length of raster
    """
    tile_parts = tile_id.split('_')
    if len(tile_parts) == 2:
        llx, lly = [int(coord) for coord in tile_parts]
        length = 1000 # assumed tile width if not explicit in tile_id
    elif len(tile_parts) == 3:
        llx, lly, length = [int(coord) for coord in tile_parts]
    
    return llx, lly, length

In [None]:
@dask.delayed
def make_tile(tile_id):
    llx, lly, length = parse_coords_from_tileid(tile_id)
    INFILES = intersecting_tiles.loc[tile_id].values[0].split(' ')
    ODIR = os.path.join(INTERIM, 'retiled')
    OUTFILE = os.path.join(ODIR, tile_id + '.laz')
    
    llx_buff, lly_buff = llx - BUFFER, lly - BUFFER
    buff_length = length + 2*BUFFER

    if os.path.exists(OUTFILE):
        pass
    else:
        try:
            proc_clip =  subprocess.run(['las2las', 
                                        '-keep_tile', 
                                        str(llx_buff), str(lly_buff), str(buff_length),
                                        '-i', *INFILES,
                                        '-merged',
                                        '-o', OUTFILE,
                                        '-olaz'], 
                                        stderr=subprocess.PIPE, stdout=subprocess.PIPE)

        except Exception as e:
            log_error(tile_id, 'make_tile', e.message)
    return tile_id

In [None]:
jobs = [make_tile(tile_id) for tile_id in intersecting_tiles.index]

In [None]:
res = c.persist(jobs)

In [None]:
progress(res)

In [None]:
inspect_failures(os.path.join(INTERIM, 'retiled', 'failed'))

## Display tile coverage

In [None]:
finished = [os.path.basename(x).split('.')[0] for x in glob.glob(os.path.join(INTERIM, 'retiled', '*.laz'))]
not_done = [x for x in intersecting_tiles.index if x not in finished]

fig, ax = plt.subplots(1, figsize=(20,20))
orig_tiles.plot(ax=ax, facecolor='blue', 
                edgecolor='none', alpha=0.3, lw=0)
new_tiles.reindex(not_done).plot(ax=ax, facecolor='black', 
                                 edgecolor='none', lw=0)

legend_elements = [Patch(facecolor='blue', edgecolor='none', alpha=0.3, lw=0, label='Original Tiles'),
                   Patch(facecolor='black', edgecolor='none', lw=0, label='New Tiles Not Produced')]
ax.legend(handles=legend_elements)


loc = ticker.MultipleLocator(base=10000)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
ax.set_xticks(loc.tick_values(*xlim))
ax.set_yticks(loc.tick_values(*ylim))
ax.set_xlim(*xlim)
ax.set_ylim(*ylim)

ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.set_ylabel('UTM Northings', labelpad=10)
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.set_xlabel('UTM Eastings', labelpad=10)

plt.show();

In [None]:
# c.cancel(res)

In [None]:
# c.shutdown()