# STAC Catalog Setup
CFolkers
Geospatial Services 
2024 02 12

modified from https://github.com/stac-utils/pystac/blob/8079dd3c0cbe8f6f9e48f499ea90f6a5798eaeab/docs/tutorials/how-to-create-stac-catalogs.ipynb
and https://github.com/stac-extensions/pointcloud/blob/main/examples/pdal-to-stac.py

In [33]:
import logging
import constants
import boto3
from botocore.exceptions import ClientError
import os
from os import path
from pathlib import Path
import pystac 
import pdal
from osgeo import ogr
from osgeo import osr
import json

In [None]:
# use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
bucket = constants.AWS_S3_BUCKET

print(f"{s3_client} {bucket}")

In [48]:
#list .laz objects in bucket
object_key="STAC_LiDAR/PointClouds/"
laz_objects=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=object_key, StartAfter=object_key)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        laz_objects.append(obj['Key'])
        print(obj['Key'])
else:
    print("No objects found in the bucket.")
    

STAC_LiDAR/PointClouds/bc_092o018_3_2_4_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_3_4_2_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_3_4_4_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_4_1_3_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_4_1_4_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_4_3_1_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_4_3_2_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_4_3_3_xyes_12_utm10_2018.laz
STAC_LiDAR/PointClouds/bc_092o018_4_3_4_xyes_12_utm10_2018.laz


In [56]:
#One way to access the objects is to Create URL to access .laz file 
url_dict={}
for laz in laz_objects:
    presigned_url=s3_client.generate_presigned_url('get_object',
                                        Params={'Bucket': bucket, 'Key': laz},
                                        ExpiresIn=3600)  # Expiration time in seconds (e.g., 1 hour)
    # print(presigned_url)
    url_dict[laz]=presigned_url

In [57]:
#attempt to access laz file form s3 via link

for key in url_dict:

    pipeline = {
        "pipeline": [
            {
                "type": "readers.las",
                "filename":url_dict[key]
            },
            {
                "type": "filters.hexbin"
            },
            {
                "type": "filters.stats"
            },
            {
                "type": "filters.info"
            }
            # Add more processing or filters if needed
        ]
    }
    reader = pdal.Pipeline(json.dumps(pipeline))
    reader.execute()
    boundary = pipeline.metadata['metadata']['filters.hexbin']
    stats = pipeline.metadata['metadata']['filters.stats']
    info = pipeline.metadata['metadata']['filters.info']
    
    break


RuntimeError: readers.las: Couldn't read LAS header. File size insufficient.

In [None]:
#another attempt to read .laz files from s3 link
#not working
for key in url_dict.values():
    r = pdal.Reader.copc(key)
    hb = pdal.Filter.hexbin()
    s = pdal.Filter.stats()
    i = pdal.Filter.info()

    pipeline: pdal.Pipeline = r | hb | s | i

    count = pipeline.execute()

    boundary = pipeline.metadata['metadata'][hb.type]
    stats = pipeline.metadata['metadata'][s.type]
    info = pipeline.metadata['metadata'][i.type]
    copc = pipeline.metadata['metadata'][r.type]

In [None]:
# another potential way is to use the get_object method 
# not working
for laz in laz_objects:
    try:
        response = s3_client.get_object(Bucket=bucket, Key=laz)
        # Access the object data
        object_data = response
        print("Object data:",laz,  object_data)
        r = pdal.Reader.copc(response)
        hb = pdal.Filter.hexbin()
        s = pdal.Filter.stats()
        i = pdal.Filter.info()

        pipeline: pdal.Pipeline = r | hb | s | i

        count = pipeline.execute()

        boundary = pipeline.metadata['metadata'][hb.type]
        stats = pipeline.metadata['metadata'][s.type]
        info = pipeline.metadata['metadata'][i.type]
        copc = pipeline.metadata['metadata'][r.type]
    except Exception as e:
        print("Error:", e)
    break

In [None]:
?pystac.Catalog

In [4]:
catalog = pystac.Catalog(id="lidar-test", description="Test catalog for the potential use of STAC to access open LiDAR Data")