In this notebook, we will clip 1-hectare and smaller plot-sized excerpts from lidar point cloud tiles retrieved from the Oregon State University lidar server. 

In [1]:
import geopandas as gpd
import pandas as pd
import os
import glob
import subprocess
import dask
from dask.distributed import LocalCluster, Client, progress

In [2]:
PLOTS = '../data/processed/blm_usfs_wadnr_plots_1ha.shp'
plots = gpd.read_file(PLOTS)
plots['geometry'] = plots.centroid
plots.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 5089 entries, 0 to 5088
Data columns (total 10 columns):
comments     496 non-null object
lat          5089 non-null float64
lon          5089 non-null float64
meas_date    3866 non-null object
orig_id      5089 non-null object
plot_id      3866 non-null object
source       5089 non-null object
meas_yr      5089 non-null int64
uuid         5089 non-null object
geometry     5089 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 397.7+ KB


In [3]:
plots.crs

{'init': 'epsg:4326'}

In [4]:
TILE_IDX = '../data/raw/lidar/osu_tiles/osu_server_tiles_with_plots.shp'
tile_idx = gpd.read_file(TILE_IDX)
tile_idx.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 378 entries, 0 to 377
Data columns (total 13 columns):
file_name     378 non-null object
version       378 non-null object
num_points    378 non-null int64
point_type    378 non-null int64
point_size    378 non-null int64
min_x         378 non-null float64
max_x         378 non-null float64
min_y         378 non-null float64
max_y         378 non-null float64
min_z         378 non-null float64
max_z         378 non-null float64
acq_name      378 non-null object
geometry      378 non-null object
dtypes: float64(6), int64(3), object(4)
memory usage: 38.5+ KB


In [5]:
tile_idx.crs

{'init': 'epsg:4326'}

In [6]:
UTM_10 = '../data/external/utm_zone10_epsg4326.shp'
UTM_11 = '../data/external/utm_zone11_epsg4326.shp'
utm_10 = gpd.read_file(UTM_10)
utm_11 = gpd.read_file(UTM_11)
utm_zones = pd.concat((utm_10, utm_11),
                      axis=0,
                      ignore_index=True)[['geometry', 'ZONE']]
utm_zones.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
geometry    2 non-null object
ZONE        2 non-null object
dtypes: object(2)
memory usage: 112.0+ bytes


In [7]:
plot_utm = gpd.sjoin(plots, utm_zones)[['uuid', 'ZONE', 'source', 'geometry']].set_index('uuid')
plot_utm['epsg'] = plot_utm.ZONE.apply(lambda x: 6339 if (x == '10') else 6340)
len(plot_utm)

5089

In [8]:
# define the utm coordinates of the centroid of each plot in that zone
for epsg in [6339, 6340]:
    plot_utm.loc[plot_utm.epsg == epsg, 'utm_x'] = plot_utm.loc[plot_utm.epsg == epsg].to_crs({'init': 'epsg:{}'.format(epsg)}).centroid.x
    plot_utm.loc[plot_utm.epsg == epsg, 'utm_y'] = plot_utm.loc[plot_utm.epsg == epsg].to_crs({'init': 'epsg:{}'.format(epsg)}).centroid.y

In [9]:
joined = gpd.sjoin(plot_utm, tile_idx[['geometry', 'file_name', 'acq_name']]).drop('index_right', axis=1)

In [10]:
joined.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 314 entries, b983d10e-c3ae-4e0e-90e4-e5018bb47860 to 0691746f-e133-4091-a40a-fb96a2685efb
Data columns (total 8 columns):
ZONE         314 non-null object
source       314 non-null object
geometry     314 non-null object
epsg         314 non-null int64
utm_x        314 non-null float64
utm_y        314 non-null float64
file_name    314 non-null object
acq_name     314 non-null object
dtypes: float64(2), int64(1), object(5)
memory usage: 22.1+ KB


In [11]:
# we'll need to do a groupby forest data source (USFS, BLM, DNR) 
# as well as lidar acquisition name so that each combo can be processed separately
# we'll need to do this for both UTM 10 and 11

In [12]:
GROUP_COLS = ['acq_name', 'source', 'uuid', 'file_name']
matching_tiles = joined.reset_index().rename({'index':'uuid'}, axis=1).groupby(by=GROUP_COLS).sum().reset_index()[GROUP_COLS].set_index('uuid')

In [13]:
plot_utm = plot_utm.loc[pd.unique(joined.index)]

In [23]:
@dask.delayed
def clip_1ha_plots(uuid):
    source = plot_utm.loc[uuid]['source']
    TILE_DIR = '../data/raw/lidar/osu_tiles/'
    
    if 'BLM' in source:
        ODIR = '../data/interim/lidar/plot_clips/osu/blm_plots/hectare_clips/'
    elif 'USFS' in source:
        ODIR = '../data/interim/lidar/plot_clips/osu/usfs_plots/hectare_clips/'
    
    # grab the coordinates of the plot for clipping
    x, y = plot_utm.loc[uuid][['utm_x', 'utm_y']]
    
    PLOT_RADIUS_M = 56.418  # radius of 1-hectare plot (185.1 ft)
    
    # find all matching tiles 
    all_matching_tiles = matching_tiles.loc[[uuid]]
    # loop through each lidar acquisition to clip the plots
    procs = []
    for acq in pd.unique(all_matching_tiles['acq_name']):
        out_name = uuid + '_' + acq + '.laz'
        outfile = os.path.join(ODIR, out_name)
        
        tiles = all_matching_tiles.loc[all_matching_tiles['acq_name'] == acq]['file_name'].values
        tiles = [os.path.join(TILE_DIR, acq, 'utm_laz', t) for t in tiles]
        print(tiles)

        proc = subprocess.run(['wine', '/storage/lidar/LAStools/bin/las2las.exe',
                               '-i', *tiles, '-merged',
                               '-o', outfile,
                               '-keep_circle', str(x), str(y), str(PLOT_RADIUS_M)],
                              stderr=subprocess.PIPE,
                              stdout=subprocess.PIPE)
        procs.append(proc)
#         print(outfile, flush=True)
#     print(uuid, flush=True)
    return procs

In [24]:
@dask.delayed
def clip_smaller_plots(ha_clip):
    dirname, basename = os.path.split(ha_clip)
    outfile = os.path.abspath(os.path.join(dirname, '..', 'plot_clips', basename))

    uuid = os.path.basename(ha_clip).split('_')[0]
    
    # grab the coordinates of the plot for clipping
    x, y = plot_utm.loc[uuid][['utm_x', 'utm_y']]
    
    source = plot_utm.loc[uuid]['source']
    if 'WA-DNR' in source:
        # radius of 1/10th acre plot (37.2 ft)
        PLOT_RADIUS_M = 11.35  
    elif 'USFS' in source:
        # radius of 1/4 acre plot (58.9 ft)
        PLOT_RADIUS_M = 17.9454 
    elif 'BLM' in source:
        # radius of 1/8th acre plot (41.6 ft)
        PLOT_RADIUS_M = 12.69  
    
    proc = subprocess.run(['wine', '/storage/lidar/LAStools/bin/las2las.exe',
                           '-i', ha_clip,
                           '-o', outfile,
                           '-keep_circle', str(x), str(y), str(PLOT_RADIUS_M)],
                          stderr=subprocess.PIPE,
                          stdout=subprocess.PIPE)
#     print(outfile, flush=True)
    return proc

In [22]:
cluster=LocalCluster(scheduler_port=7001, diagnostics_port=7002)
c = Client(cluster)

In [17]:
# c.scatter([joined, plot_utm, matching_tiles])

[<Future: status: finished, type: GeoDataFrame, key: GeoDataFrame-5070d4700031bc29c0dcc93f5e897a4a>,
 <Future: status: finished, type: GeoDataFrame, key: GeoDataFrame-eb28ce3dacd7fd61ea1f0c599ae43c5d>,
 <Future: status: finished, type: DataFrame, key: DataFrame-6b89590235e6aa0112f474a808882143>]

In [25]:
plot_ids = pd.unique(plot_utm.index)
len(plot_ids)

313

In [29]:
res = c.persist([clip_1ha_plots(i) for i in plot_ids])
progress(res)

VBox()

In [30]:
usfs_ha = glob.glob('../data/interim/lidar/plot_clips/osu/usfs_plots/hectare_clips/*.laz') 
blm_ha = glob.glob('../data/interim/lidar/plot_clips/osu/blm_plots/hectare_clips/*.laz')
ha_clips = usfs_ha + blm_ha

res = c.persist([clip_smaller_plots(f) for f in ha_clips])
progress(res)

VBox()

In [32]:
usfs_plot = glob.glob('../data/interim/lidar/plot_clips/osu/usfs_plots/plot_clips/*.laz') 
blm_plot = glob.glob('../data/interim/lidar/plot_clips/osu/blm_plots/plot_clips/*.laz')
plot_clips = usfs_plot + blm_plot
len(plot_clips)

308

In [33]:
@dask.delayed
def get_boundary(infile, odir):
    proc = subprocess.run(['wine', '/storage/lidar/LAStools/bin/lasboundary.exe',
                           '-i', infile,
                           '-odir', odir,
                           '-oshp',
                           '-labels'], 
                          stderr=subprocess.PIPE,
                          stdout=subprocess.PIPE)
#     print(outfile)
    return proc

In [34]:
USFS_HA = glob.glob('../data/interim/lidar/plot_clips/osu/usfs_plots/hectare_clips/*.laz')
USFS_PLOT = glob.glob('../data/interim/lidar/plot_clips/osu/usfs_plots/plot_clips/*.laz')
BLM_HA = glob.glob('../data/interim/lidar/plot_clips/osu/blm_plots/hectare_clips/*.laz')
BLM_PLOT = glob.glob('../data/interim/lidar/plot_clips/osu/blm_plots/plot_clips/*.laz')

jobs_to_do = []
PLOT_SETS = [USFS_HA, USFS_PLOT, BLM_HA, BLM_PLOT]
ODIRS = ['../data/interim/lidar/plot_clips/noaa/usfs_plots/hectare_clips/',
         '../data/interim/lidar/plot_clips/noaa/usfs_plots/plot_clips/',
         '../data/interim/lidar/plot_clips/noaa/blm_plots/hectare_clips/',
         '../data/interim/lidar/plot_clips/noaa/blm_plots/plot_clips/']

for plot_set, odir in zip(PLOT_SETS, ODIRS):
    jobs_to_do += [get_boundary(p, odir=odir) for p in plot_set]

In [35]:
res = c.persist(jobs_to_do)
progress(res)

VBox()

In [20]:
# c.cancel(res)

In [36]:
# c.close()
# cluster.close()