In [None]:
import os
import shutil
import glob
import subprocess
import pandas as pd
import geopandas as gpd
import dask
from dask.distributed import Client, progress, LocalCluster
from pyFIRS.wrappers import lastools, fusion
from pyFIRS.utils import clean_dir, clip_tile_from_shp, convert_project, PipelineError

Launch a parallel computing cluster. 

In [None]:
cluster=LocalCluster(scheduler_port=7001, diagnostics_port=7002)
c = Client(cluster)
num_cores = len(c.ncores()) # identify how many workers we have

At this point, you should also be able to view an interactive dashboard on port 7002. If you're executing this on a remote server, you'll need to set up port forward so you can view the dashboard on your local machine's browser. Once you've done that, or if you're processing on your own machine, you can view the dashboard at [http://localhost:7002/status](http://localhost:7002/status).

In [None]:
las = lastools.useLAStools('/storage/lidar/LAStools/bin')
fus = fusion.useFUSION('/storage/lidar/FUSION/')

In [None]:
# where the imported lidar data is currently stored
workdir = os.path.abspath('/storage/lidar/odf_northwest_2015/wilkerson/')

# define data handling directories
interim = os.path.join(workdir,'interim')
processed = os.path.join(workdir,'processed')
layers = os.path.join(interim, 'layers')

# the coordinate reference system we'll be working with
target_epsg = 26910 # utm 10 N

In [None]:
def log_error(tile_id, process, error_msg):
    logfile = os.path.join(interim, 'failed', tile_id + '.txt')
    os.makedirs(os.path.dirname(logfile), exist_ok=True)
    
    with open(logfile, '+w') as f:
        f.write('{} | {}: {}'.format(tile_id, process, error_msg))
    
    return

def has_error(tile_id):
    errors = glob.glob(os.path.join(interim, 'failed', '*.txt'))
    tiles_with_errors = [os.path.basename(error).split('.')[0] for error in errors]
    if tile_id in tiles_with_errors:
        return True
    else:
        return False

In [None]:
# push our working directories and wrapper classes to the workers on the cluster as well
c.scatter([interim, processed, layers, las, fus, target_epsg, num_cores, has_error, log_error], broadcast=True);

## Create a Canopy GridMetrics
Calculate forest attributes using the FUSION `gridmetrics` tool.

In [None]:
@dask.delayed
def make_gridmetrics(tile_id):
    infile = os.path.join(interim, 'classified', tile_id + '.laz')
    odir = os.path.join(interim, 'gridmetrics')
    outfile = os.path.join(odir, tile_id + '.csv')
    
    if not os.path.exists(outfile):
        if not has_error(tile_id):
            try:
                proc = fus.gridmetrics(groundfile=os.path.join(interim, 'dtm_ground_tiles', tile_id + '.dtm'),
                                       heightbreak=1.37, # breast height, in meters
                                       cellsize=10, # in units of lidar data
                                       outputfile=outfile,
                                       datafiles=infile,
                                       strata=(0.15, 1.37, 5.0, 10.0, 20.0, 30.0),
                                       intstrata=(0.15, 1.37, 5.0, 10.0, 20.0, 30.0),
                                       las_class=(0,1,2,3,4,5), 
                                       odir=odir) # will make sure output directory is created if doesn't already exist
                
            except PipelineError as e:
                        log_error(tile_id, 'make_gridmetrics', e.message)
    else: # output file already exists
        pass
                
    return tile_id

In [None]:
# @dask.delayed
# def make_rasters_from_csvs(tile_id):
#     # CSV 2 GRID    
#     infile = os.path.join(interim, 'gridmetrics', tile_id + '_all_returns_strata_stats.csv')
#     odir = os.path.join(interim, 'gridmetrics')
#     outfile = os.path.join(odir, tile_id + '.csv')
    
#     if not os.path.exists(outfile):
#         if not has_error(tile_id):
#             try:
#                 proc = fus.
                
#             except PipelineError as e:
#                         log_error(tile_id, 'make_gridmetrics', e.message)
#     else: # output file already exists
#         pass
                
#     return tile_id

In [None]:
# glob.glob(os.path.join(interim, 'gridmetrics', '*.*'))

In [None]:
# ! head /storage/lidar/olc_metro_2014/interim/gridmetrics/489000_5026000_all_returns_strata_stats_ascii_header.txt

In [None]:
# strata_cols_to_grid = {'Elev strata (below 0.15) return proportion':'strat0_return-proportion',
#                        'Elev strata (0.15 to 1.37) return proportion':'strat1_return-proportion',
#                        'Elev strata (5.00 to 10.00) return proportion':'strat2_return-proportion',
#                        'Elev strata (10.00 to 20.00) return proportion':'strat3_return-proportion',
#                        'Elev strata (20.00 to 30.00) return proportion':'strat4_return-proportion',
#                        'Elev strata (above 30.00) return proportion':'strat5_return-proportion',
#                        'Int strata (below 0.15) median':'strat0_intensity-median',
#                        'Int strata (0.15 to 1.37) median':'strat1_intensity-median',
#                        'Int strata (1.37 to 5.00) median':'strat2_intensity-median',
#                        'Int strata (5.00 to 10.00) median':'strat3_intensity-median',
#                        'Int strata (10.00 to 20.00) median':'strat4_intensity-median',
#                        'Int strata (above 30.00) median':'strat5_intensity-median', 
#                        'Int strata (below 0.15) CV':'strat0_intensity-cv',
#                        'Int strata (0.15 to 1.37) CV':'strat1_intensity-cv',
#                        'Int strata (1.37 to 5.00) CV':'strat2_intensity-cv',
#                        'Int strata (5.00 to 10.00) CV':'strat3_intensity-cv',
#                        'Int strata (10.00 to 20.00) CV':'strat4_intensity-cv',
#                        'Int strata (above 30.00) CV':'strat5_intensity-cv'}

# elevation_cols_to_grid = {'Elev P05':'height_05-percentile',
#                           'Elev P25':'height_25-percentile',
#                           'Elev P50':'height_50-percentile',
#                           'Elev P75':'height_75-percentile',
#                           'Elev P95':'height_95_percentile',
#                           'Elev maximum':'height_max'}

In [None]:
# # get column numbers (starting at 1) for each variable of interest
# strata = pd.read_csv('/storage/lidar/olc_metro_2014/interim/gridmetrics/489000_5026000_all_returns_strata_stats.csv')
# strata_column_indexes = [strata.columns.get_loc(col)+1 for col in strata.columns if col in strata_cols_to_grid.keys()]
# print(strata_column_indexes)

# # get column numbers (starting at 1) for each variable of interest
# elevation = pd.read_csv('/storage/lidar/olc_metro_2014/interim/gridmetrics/489000_5026000_all_returns_elevation_stats.csv')
# elevation_column_indexes = [elevation.columns.get_loc(col)+1 for col in elevation.columns if col in elevation_cols_to_grid.keys()]
# print(elevation_column_indexes)

In [None]:
# TODO 

# CSV2GRID
# Will generate ASCII files from Gridmetrics CSV outputs
# FOR Height Percentiles, Stratum Percentiles (all returns), Intensity, Canopy Cover

# ASC2TIF
# Convert ASCII files to GeoTiffs, define their projections

# CLIP 
# ClIP all the geotiffs to unbuffered tile boundaries

In [None]:
c.close()
cluster.close()