# Create JSON from PDAL For STAC
CFolkers
Geospatial Services 
2024 02 12

modified from https://github.com/stac-extensions/pointcloud/blob/main/examples/pdal-to-stac.py

# Further Updates
- instead of downloading .laz files then using PDAL, use PDAL to directly access ,laz files in S3

In [1]:
import sys
import logging
import constants
import boto3
from botocore.exceptions import ClientError
import os
from os import path
from pathlib import Path
import pystac 
import pdal
from osgeo import ogr
from osgeo import osr
import json
from osgeo import ogr, osr


loading dot env...


In [10]:
#list .laz objects in bucket
object_key=r'STAC_LiDAR/PointClouds/'
#pdal .laz download loc- 
download_loc=r'STAC_LiDAR/Download'
#s3 storage location for json files
json_out_loc=r'STAC_LiDAR/JSON/'

In [3]:
# use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
bucket = constants.AWS_S3_BUCKET

In [4]:
laz_objects=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=object_key, StartAfter=object_key)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        laz_objects.append(obj['Key'])
        print(obj['Key'])
        print(f"Object Size {obj['Size']}")
else:
    print("No objects found in the bucket.")

STAC_LiDAR/PointClouds/bc_092o018_3_2_4_xyes_12_utm10_2018.laz
Object Size 23236093
STAC_LiDAR/PointClouds/bc_092o018_3_4_2_xyes_12_utm10_2018.laz
Object Size 140355729
STAC_LiDAR/PointClouds/bc_092o018_3_4_4_xyes_12_utm10_2018.laz
Object Size 50122462
STAC_LiDAR/PointClouds/bc_092o018_4_1_3_xyes_12_utm10_2018.laz
Object Size 95552259
STAC_LiDAR/PointClouds/bc_092o018_4_1_4_xyes_12_utm10_2018.laz
Object Size 336226672
STAC_LiDAR/PointClouds/bc_092o018_4_3_1_xyes_12_utm10_2018.laz
Object Size 315611463
STAC_LiDAR/PointClouds/bc_092o018_4_3_2_xyes_12_utm10_2018.laz
Object Size 354790466
STAC_LiDAR/PointClouds/bc_092o018_4_3_3_xyes_12_utm10_2018.laz
Object Size 312537985
STAC_LiDAR/PointClouds/bc_092o018_4_3_4_xyes_12_utm10_2018.laz
Object Size 332078905


In [5]:
def capture_date(pdalinfo):
    import datetime
    year = pdalinfo['creation_year']
    day = pdalinfo['creation_doy']
    date = datetime.datetime(int(year), 1, 1) + datetime.timedelta(int(day) - 1 if int(day) > 1 else int(day))
    return date.isoformat()+'Z'

def convertGeometry(geom, srs, crs):
    in_ref = osr.SpatialReference()
    in_ref.SetFromUserInput(srs)
    out_ref = osr.SpatialReference()
    out_ref.SetFromUserInput(crs)

    g = ogr.CreateGeometryFromJson(json.dumps(geom))
    g.AssignSpatialReference(in_ref)
    g.TransformTo(out_ref)
    return json.loads(g.ExportToJson())


def convertBBox(obj):
    output = []
    output.append(float(obj['minx']))
    output.append(float(obj['miny']))
    output.append(float(obj['minz']))
    output.append(float(obj['maxx']))
    output.append(float(obj['maxy']))
    output.append(float(obj['maxz']))
    return output

In [8]:
#download .laz files and read the headers


for laz in laz_objects:
    output = {}
    
    laz_download=f"{download_loc}{laz.split('/')[-1]}"
    download_object=f"{object_key}{laz.split('/')[-1]}"
    
    if not os.path.exists(laz_download):   
        s3_client.download_file(bucket, download_object, laz_download)
    
    
    # fix WKT 
    filename = laz_download
    with open(filename, "rb+") as f:
        f.seek(6)
        f.write(bytes([17, 0, 0, 0]))
    
    #start pdal pipline
    r = pdal.Reader.las(laz_download)
    hb = pdal.Filter.hexbin()
    s = pdal.Filter.stats()
    i = pdal.Filter.info()

    pipeline: pdal.Pipeline = r | hb | s | i

    count = pipeline.execute()

    boundary = pipeline.metadata['metadata'][hb.type]
    stats = pipeline.metadata['metadata'][s.type]
    info = pipeline.metadata['metadata'][i.type]
    copc = pipeline.metadata['metadata'][r.type]
    
    if copc['comp_spatialreference'] == '':
        if 'utm08' in filename or 'utm8' in filename:
            copc['comp_spatialreference'] = 'EPSG:26708'
            srs='EPSG:26708'
        elif 'utm09' in filename or 'utm9' in filename:
            copc['comp_spatialreference'] = 'EPSG:26709'
            srs= 'EPSG:26709'
        elif 'utm10' in filename:
            copc['comp_spatialreference'] = 'EPSG:26910'
            srs= 'EPSG:26910'
        elif 'utm11' in filename:
            copc['comp_spatialreference'] = 'EPSG:26711'
            srs= 'EPSG:26711'
        elif 'utm12' in filename:
            copc['comp_spatialreference'] = 'EPSG:26711'
            srs= 'EPSG:26711'
    
    try:
        output['geometry'] = convertGeometry(
        boundary['boundary_json'],
        copc['comp_spatialreference'],
        srs
        )
    except KeyError:
        output['geometry'] = stats['bbox'][srs]['boundary']

    output['bbox'] = convertBBox(stats['bbox']['native']['bbox'])
    output['id'] = path.basename(filename)
    output['type'] = 'Feature'
    assets = {'data': {'href': filename}}
    
    properties = {}

    properties['pc:schemas'] = info['schema']['dimensions']
    properties['pc:statistics'] = stats['statistic']
    properties['title'] = "LiDAR BC"
    properties['providers'] = [
        {
            "name": "LidarBC",
            "description": "LidarBC is an initiative to provide open public access to Light Detection and Ranging data (LiDAR) and associated datasets collected by the Government of British Columbia.",
            "roles": [
            "producer",
            ],
            "url": "https://lidar.gov.bc.ca/"
        }
    ]
    properties['pc:type'] = 'lidar' # eopc, lidar, radar, sonar
    properties['pc:epsg'] = srs.split(':')[-1]

    try:
        properties['pc:density'] = boundary['avg_pt_per_sq_unit']
    except KeyError:
        properties['pc:density'] = 0
    properties['pc:count'] = count

    properties['datetime'] = capture_date(copc)

    output['properties'] = properties
    output['assets'] = assets
    output['stac_extensions'] = ['https://stac-extensions.github.io/pointcloud/v1.0.0/schema.json']
    output['stac_version'] = '1.0.0'

    
    out_filename = str(f"{json_out_loc}{laz.split('/')[-1].replace('.laz','.json')}")
    print(out_filename)
    
    
    self_link = {'rel':'self',"href":'./example-autzen.json'}
    lic_link = {'rel':'license',"href":'https://github.com/PDAL/data/blob/master/LICENSE'}
    output['links'] = [self_link, lic_link]
    
    s3_client.put_object(
        Body=json.dumps(output, sort_keys=True, indent=2, separators=(',', ': ')), 
        Bucket=bucket,
        Key= out_filename
    )



STAC_LiDAR/JSON/bc_092o018_3_2_4_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_3_4_2_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_3_4_4_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_4_1_3_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_4_1_4_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_4_3_1_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_4_3_2_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_4_3_3_xyes_12_utm10_2018.json
STAC_LiDAR/JSON/bc_092o018_4_3_4_xyes_12_utm10_2018.json


In [9]:
response = s3_client.list_objects_v2(Bucket=bucket, Prefix=json_out_loc, StartAfter=json_out_loc)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        laz_objects.append(obj['Key'])
        print(obj['Key'])
        print(f"Object Size {obj['Size']}")
else:
    print("No objects found in the bucket.")

STAC_LiDAR/JSON/bc_092o018_3_2_4_xyes_12_utm10_2018.json
Object Size 8613
STAC_LiDAR/JSON/bc_092o018_3_4_2_xyes_12_utm10_2018.json
Object Size 9958
STAC_LiDAR/JSON/bc_092o018_3_4_4_xyes_12_utm10_2018.json
Object Size 9234
STAC_LiDAR/JSON/bc_092o018_4_1_3_xyes_12_utm10_2018.json
Object Size 10496
STAC_LiDAR/JSON/bc_092o018_4_1_4_xyes_12_utm10_2018.json
Object Size 7724
STAC_LiDAR/JSON/bc_092o018_4_3_1_xyes_12_utm10_2018.json
Object Size 7735
STAC_LiDAR/JSON/bc_092o018_4_3_2_xyes_12_utm10_2018.json
Object Size 7362
STAC_LiDAR/JSON/bc_092o018_4_3_3_xyes_12_utm10_2018.json
Object Size 8684
STAC_LiDAR/JSON/bc_092o018_4_3_4_xyes_12_utm10_2018.json
Object Size 7514
