In this notebook, we reproject all point clouds from the geographic coordinate system they come with from the NOAA lidar server over to UTM. We will need to get the NOAA tiles into a projected coordinate system before we can clip plots from them.

In [1]:
import geopandas as gpd
import pandas as pd
import os
import glob
import subprocess
import dask
from dask.distributed import Client, LocalCluster, progress

We will need to know which UTM zone each tile is in.

If you haven't created a tile index already, you can do so like this:

In [4]:
TILE_DIR = '../data/raw/lidar/noaa_tiles/orig_laz/'
tiles = glob.glob(os.path.join(TILE_DIR, '*.laz'))

@dask.delayed
def get_boundary(infile, odir):
    proc = subprocess.run(['wine', '/storage/lidar/LAStools/bin/lasboundary.exe',
                           '-i', infile,
                           '-odir', odir,
                           '-oshp',
                           '-use_bb',
                           '-labels'], 
                          stderr=subprocess.PIPE,
                          stdout=subprocess.PIPE)
#     print(outfile)
    return proc

In [2]:
cluster=LocalCluster(scheduler_port=7001, diagnostics_port=7002)
c = Client(cluster)

In [None]:
res = c.persist([get_boundary(t, odir=TILE_DIR) for t in tiles])
progress(res)

In [None]:
# c.cancel(res)

In [None]:
# c.close()
# cluster.close()

In [5]:
# gather coverages of each tile
shps = glob.glob(os.path.join(TILE_DIR, '*.shp'))
len(shps)

3617

In [6]:
# read in all the shapefiles of tile boudnaries into a list
gdfs = [gpd.read_file(shp) for shp in shps]
# concatentate them into a single geodataframe
gdf = pd.concat(gdfs, axis=0, ignore_index=True)
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3617 entries, 0 to 3616
Data columns (total 12 columns):
file_name     3617 non-null object
version       3617 non-null object
num_points    3617 non-null int64
point_type    3617 non-null int64
point_size    3617 non-null int64
min_x         3617 non-null float64
max_x         3617 non-null float64
min_y         3617 non-null float64
max_y         3617 non-null float64
min_z         3617 non-null float64
max_z         3617 non-null float64
geometry      3617 non-null object
dtypes: float64(6), int64(3), object(3)
memory usage: 339.2+ KB


In [7]:
gdf.crs

{'init': 'epsg:4269'}

In [14]:
tile_idx = gdf.to_crs({'init':'epsg:4326'})
tile_idx.to_file('../data/raw/lidar/noaa_tiles/noaa_tileindex.shp')

Now we'll grab our UTM zones and do some spatial joins to assign each lidar tile to a UTM zone.

In [9]:
UTM_10 = '../data/external/utm_zone10_epsg4326.shp'
UTM_11 = '../data/external/utm_zone11_epsg4326.shp'
utm_10 = gpd.read_file(UTM_10)
utm_11 = gpd.read_file(UTM_11)
utm_10.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 7 columns):
SWLON         1 non-null object
SWLAT         1 non-null object
HEMISPHERE    1 non-null object
ZONE          1 non-null object
CM            1 non-null object
Zone_Hemi     1 non-null object
geometry      1 non-null object
dtypes: object(7)
memory usage: 136.0+ bytes


In [10]:
utm10_tiles = gpd.sjoin(tile_idx, utm_10)
utm11_tiles = gpd.sjoin(tile_idx, utm_11)
len(tile_idx), len(utm10_tiles), len(utm11_tiles)

(3617, 3518, 99)

In [11]:
utm_lookup = pd.concat((utm10_tiles, utm11_tiles),
                       axis=0, ignore_index=True)[['file_name', 'ZONE']].set_index('file_name')
utm_lookup['epsg'] = utm_lookup.ZONE.apply(lambda x: 6339 if x == '10' else 6340)
utm_lookup.head()

Unnamed: 0_level_0,ZONE,epsg
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1
NOAA2482_124a3317.laz,10,6339
NOAA2482_124h4322.laz,10,6339
NOAA2482_123a5225.laz,10,6339
NOAA2496_q46123h3419.laz,10,6339
NOAA6331_20160318_USGS_LPC_WA_Western_North_2016_q47121H7301_LAS_2018.laz,10,6339


In [12]:
@dask.delayed
def reproject(infile, odir):
    basename = os.path.basename(infile)
    outfile = os.path.join(odir, basename)
    
    target_epsg = utm_lookup.loc[basename]['epsg']
    
    proc = subprocess.run(['wine', '/storage/lidar/LAStools/bin/las2las.exe',
                           '-i', infile,
                           '-o', outfile,
                           '-target_epsg', str(target_epsg)],
                          stderr=subprocess.PIPE,
                          stdout=subprocess.PIPE)
    return proc

In [13]:
ODIR = '../data/raw/lidar/noaa_tiles/utm_laz/'
res = c.persist([reproject(t, odir=ODIR) for t in tiles])
progress(res)

VBox()

In [1]:
# c.close()
# cluster.close()

NameError: name 'c' is not defined