In [None]:
import os
import glob
import multiprocessing
from pyFIRS.wrappers import lastools

In [None]:
WORKDIR = '/storage/lidar/hoh-river_2012/'
RAW = os.path.join(WORKDIR, 'raw')
INTERIM = os.path.join(WORKDIR, 'interim')
NUM_CORES = 32
POINT_CAPACITY = 50000000 # maximum number of points to allow in each tile

In [None]:
las = lastools.useLAStools('/storage/lidar/LAStools/bin')

## Retile the data to add buffers for avoiding edge effects during processing.

In practice, executing the `lastile` command on individual tiles in parallel is likely to corrupt your output files. I suspect this is because the dynamic re-tiling of input files means that many output tiles are likely to require inputs from multiple input files, and that parallel processing outside of LAStools may result in collisions writing data from multiple inputs to these output tiles. So, for this case, we'll let `lastile` handle the parallelism under the hood. We won't have a progress bar, but this shouldn't take more than 5-10 minutes per ~100 tiles (with vendor tile size ~1000x1000m with 4-8 pts/m2).

**THERE ARE ARGUMENTS IN THE FOLLOWING COMMAND THAT DEPEND UPON THE UNITS OF THE DATA.**

In [None]:
%%time
INFILE_STR = os.path.join(RAW, '*.laz')
INFILES = glob.glob(INFILE_STR)
ODIR = os.path.join(INTERIM, 'retiled')
print('Retiling {:,d} tiles'.format(len(INFILES)))

# do the processing
tile_proc = las.lastile(i=INFILE_STR,
                        tile_size=1000, # in units of lidar data
                        buffer=50, # assumes units are in meters
                        flag_as_withheld=True, # flag buffer points as "withheld"
                        refine_tiling=POINT_CAPACITY,
                        olaz=True,
                        odir=ODIR,
                        cores=NUM_CORES);

print('First pass done. Now moving to adaptive tiling.')

# refine the tiles to ensure no tile contains more than desired # of points
tile_proc2 = las.lastile(i=os.path.join(ODIR, '*_1000.laz'),
                        flag_as_withheld=True,
                        refine_tiles=POINT_CAPACITY,
                        olaz=True,
                        cores=NUM_CORES);

print('Done with first pass, moving onto second.')
# refine the tiles to ensure no tile contains more than desired # of points
tile_proc3 = las.lastile(i=os.path.join(ODIR, '*_500.laz'),
                        flag_as_withheld=True, # flag buffer points as "withheld"
                        refine_tiles=POINT_CAPACITY,
                        olaz=True,
                        cores=NUM_CORES);

print('Done with second pass, moving onto third.')
# refine the tiles to ensure no tile contains more than desired # of points
tile_proc4 = las.lastile(i=os.path.join(ODIR, '*_250.laz'),
                        flag_as_withheld=True, # flag buffer points as "withheld"
                        refine_tiles=POINT_CAPACITY,
                        olaz=True,
                        cores=NUM_CORES);

print('Done.')

In [None]:
ODIR = os.path.join(INTERIM, 'retiled')
tiles_1000 = ['_'.join(os.path.basename(x).split('_')[0:2]) for x in glob.glob(os.path.join(ODIR, '*_1000.laz'))]
tiles_500 = ['_'.join(os.path.basename(x).split('_')[0:2]) for x in glob.glob(os.path.join(ODIR, '*_500.laz'))]
tiles_250 = ['_'.join(os.path.basename(x).split('_')[0:2]) for x in glob.glob(os.path.join(ODIR, '*_250.laz'))]
redundant_tiles = [x for x in tiles_1000 if x in tiles_500] + [x for x in tiles_1000 if x in tiles_250]
redundant_tiles = [os.path.join(ODIR,x+'_1000.laz') for x in redundant_tiles]

print('Produced {:,d} original tiles'.format(len(tiles_1000)))
print('Removing {:,d} redundant original tiles covered by smaller tiles'.format(len(redundant_tiles)))

with multiprocessing.Pool(NUM_CORES) as p:
    p.map(os.remove, redundant_tiles)

OUTFILES = glob.glob(os.path.join(ODIR, '*.laz'))
print('Produced {:,d} new tiles, each with <= {:,d} points'.format(len(OUTFILES), POINT_CAPACITY))