In [None]:
import geopandas as gpd
import pandas as pd
import geoplot as gplt
import laspy
import shapely
import os
from datetime import datetime
import numpy as np
from itertools import product
import math
import pyproj
import re
import pyspark

In [None]:
resolution = 2000  #grid resolution in meters

In [None]:
def lp(v):
    print(f"[{datetime.now()}] {v}")

lp("Starting...")

In [None]:
lp("Creating spark context")
conf = pyspark.SparkConf().setAppName("ProcessLidar").setMaster("spark://G-ChaosStation:7077")
sc = pyspark.SparkContext(conf=conf)

In [None]:
dataPath = f"{os.curdir}{os.sep}datasets{os.sep}"
fullDataPath = os.path.realpath(dataPath) + os.sep
lp(f"Path:  {dataPath}\t\tFullPath:{fullDataPath}")

In [None]:
testLasFileName = os.listdir(f"{dataPath}laz")[5]
lp(f"Opening {testLasFileName} to get crs and more")
testLas = laspy.read(f"{dataPath}laz{os.sep}{testLasFileName}")

In [None]:
neighborhoodDf = gpd.read_file(f"{dataPath}Neighborhoods.geojson").to_crs(testLas.vlrs[0].parse_crs())

In [None]:
neighborhoodDf.plot()

In [None]:
bounds = [int(b) for b in neighborhoodDf.total_bounds]

In [None]:
xPixels = (bounds[2] - bounds[0]) / resolution
yPixels = (bounds[3] - bounds[1]) / resolution
lp(f"Resolution will be {xPixels} x {yPixels}")

In [None]:
boxes = [shapely.box(x,y,x+resolution,y+resolution) for x, y in product(range(bounds[0], bounds[2], resolution), range(bounds[1], bounds[3], resolution))]
boxesDf = gpd.GeoDataFrame(geometry=boxes).set_crs(neighborhoodDf.crs)
boxesDf = boxesDf[boxesDf.intersects(neighborhoodDf.union_all())].copy()
boxesDf['AltitudeTotal'] = np.nan
boxesDf['WaterTotal'] = np.nan
boxesDf['Total'] = np.nan
boxesDf['RunTime'] = dateetime.now()-datetime.now()


In [None]:
boxesDf.plot()


In [None]:
wgsToUTM = pyproj.transformer.Transformer.from_crs(crs_from=pyproj.CRS.from_string('WGS84'), crs_to=neighborhoodDf.crs)
meridianReg = re.compile(r'PARAMETER\["central_meridian",\-([0-9]{2})\]')


def processLas(las, boxBounds):
    lp(f"Processing for box {boxBounds}")
        
    central_meridian = int(meridianReg.findall(las.header.vlrs[0].string)[0]) * -1
        
    groundMask = np.isin(las.classification, [2,9])
    inBoundsMaskX = np.logical_and(las.X >= (boxBounds[0]*1000), (las.X <= (boxBounds[2]*1000)))
    inBoundsMaskY = np.logical_and(las.Y >= (boxBounds[1]*1000), (las.Y <= (boxBounds[3]*1000)))
    goodPointMask = np.logical_and(groundMask,inBoundsMaskX,inBoundsMaskY)
    #lp(f"{np.count_nonzero( goodPointMask )}, {np.count_nonzero( groundMask )}, {np.count_nonzero( inBoundsMaskX )} ,{np.count_nonzero( inBoundsMaskY )} in {fileName}")
    
    
    x = las.X[goodPointMask]/1000.0 #- bounds[0]
    y = (las.Y[goodPointMask]/1000.0 + wgsToUTM.transform(30,central_meridian)[1]) #- bounds[1]
 
    return (int(las.Z[goodPointMask].sum()), np.count_nonzero(las.classification[goodPointMask] == 9) , np.count_nonzero(goodPointMask))


In [None]:
processLas(laspy.read('datasets/laz/USGS_LPC_LA_2021GreaterNewOrleans_C22_w0776n3318.laz'), boxesDf.iloc[0]['geometry'].bounds)

In [None]:
outPath = f"{os.curdir}{os.sep}output{os.sep}"
if not os.path.exists(outPath):
    lp(f"Creating output path {outPath}")
    os.makedirs(outPath)

In [None]:
lasPaths = [f"{fullDataPath}laz{os.sep}{f}" for f in os.listdir(f"{dataPath}laz")]
lasPathRdd = sc.parallelize(lasPaths)
lasDataRdd = lasPathRdd.map(laspy.read)  #.cache() #you got the ram for this hotshot

In [None]:


seqOp = lambda x, y: (x[0]+y[0], x[1]+y[1], x[2] + y[2])
total = 0
for idx, row in boxesDf.iterrows():
    startTime = datetime.now()
    lp(f"Processing [{idx}]({total+1}/{boxesDf.shape[0]}) for bounded box {row['geometry'].bounds}")
    
    outputRdd = lasDataRdd.map(lambda x: processLas(x, row['geometry'].bounds))
    output = outputRdd.aggregate((0,0,0), seqOp, seqOp)

    boxesDf.loc[idx, 'AltitudeTotal'] = output[0]
    boxesDf.loc[idx, 'WaterTotal'] = output[1]
    boxesDf.loc[idx, 'Total'] = output[2]

    total += 1
    runTime = datetime.now() - startTime
    boxesDf.loc[idx,'RunTime'] = runTime
    lp(f"({total}/{boxesDf.shape[0]}) Output {output} for bounded box[{idx}] {row['geometry'].bounds}  done in {runTime}")
    
    
    
    

In [None]:
boxesDf.to_pickle(f"{outPath}AggregateLidarData_{resolution}m.pickle")


In [None]:
with open(f"{outPath}FinishTime_{resolution}m.txt", 'w') as f:
    f.write(f"Finished shape {boxesDf.shape}  stop time {datetime.now()}")

In [None]:
lp("Done!")