# Create STAC Catalog of .laz Files
CFolkers
Geospatial Services 
2024 03 04

Resources: https://github.com/stac-utils/pystac/blob/8079dd3c0cbe8f6f9e48f499ea90f6a5798eaeab/docs/tutorials/how-to-create-stac-catalogs.ipynb
https://stacspec.org/en/tutorials/2-create-stac-catalog-python/
https://github.com/stac-utils/pystac/blob/main/tests/extensions/test_pointcloud.py
https://pystac.readthedocs.io/en/latest/api/extensions/pointcloud.html

In [75]:
import os 
import pystac
from pystac import Link
from pystac.stac_io import DefaultStacIO
from pystac.extensions.pointcloud import (
    AssetPointcloudExtension,
    PhenomenologyType,
    PointcloudExtension,
    Schema,
    SchemaType,
    Statistic,
)
from botocore import UNSIGNED
from botocore.config import Config
import boto3
import constants
import os
import urllib.request
import json
from typing import Union, Any
from urllib.parse import urlparse
from shapely.geometry import Polygon, mapping,shape, MultiPolygon

from datetime import datetime

In [2]:
#list .laz objects in bucket
object_key=r'STAC_LiDAR/PointClouds/'
#s3 storage location for json files
json_key=r'STAC_LiDAR/JSON/'

In [3]:
# use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
test_bucket = constants.AWS_S3_BUCKET

In [4]:
#may not be needed 

class CustomStacIO(DefaultStacIO):
    def __init__(self):
        self.s3_client = boto3.resource("s3")
        super().__init__()

    def read_text(self, source: Union[str, Link], *args: Any, **kwargs: Any) -> str:
        parsed = urlparse(source)
        if parsed.scheme == "s3":
            bucket = parsed.netloc
            key = parsed.path[1:]

            obj = self.s3_client.Object(bucket, key)
            return obj.get()["Body"].read().decode("utf-8")
        else:
            return super().read_text(source, *args, **kwargs)

    def write_text(
        self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any
    ) -> None:
        parsed = urlparse(dest)
        if parsed.scheme == "s3":
            bucket = parsed.netloc
            key = parsed.path[1:]
            self.s3_client.Object(bucket, key).put(Body=txt, ContentEncoding="utf-8")
        else:
            super().write_text(dest, txt, *args, **kwargs)


In [55]:
#need more dynamic way to create dictionary in case there things are not in order, or missing values 
# also not sure which dict I can keep
#laz dict returns a dict of s3 locations for laz:json, maybe do this the same as below dict of dict with index as first key, then laz:json?
#laz_items_id returns a dict of dicts of index_num:{'type':stac location}

laz_dict={}

laz_response = s3_client.list_objects_v2(Bucket=test_bucket, Prefix=object_key, StartAfter=object_key)
json_repsone= s3_client.list_objects_v2(Bucket=test_bucket, Prefix=json_key, StartAfter=json_key)

if 'Contents' in laz_response and 'Contents' in json_repsone:
    # Iterate over objects and print their names
    for index, (key, value) in enumerate(zip(laz_response['Contents'],json_repsone['Contents'])):
        laz_dict[index]={key['Key']:value['Key']}
else:
    print("No objects found in the bucket.")
print(laz_dict)

{0: {'STAC_LiDAR/PointClouds/bc_092o018_3_2_4_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_3_2_4_xyes_12_utm10_2018.json'}, 1: {'STAC_LiDAR/PointClouds/bc_092o018_3_4_2_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_3_4_2_xyes_12_utm10_2018.json'}, 2: {'STAC_LiDAR/PointClouds/bc_092o018_3_4_4_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_3_4_4_xyes_12_utm10_2018.json'}, 3: {'STAC_LiDAR/PointClouds/bc_092o018_4_1_3_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_1_3_xyes_12_utm10_2018.json'}, 4: {'STAC_LiDAR/PointClouds/bc_092o018_4_1_4_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_1_4_xyes_12_utm10_2018.json'}, 5: {'STAC_LiDAR/PointClouds/bc_092o018_4_3_1_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_3_1_xyes_12_utm10_2018.json'}, 6: {'STAC_LiDAR/PointClouds/bc_092o018_4_3_2_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_3_2_xyes_12_utm10_2018.json'}, 7: {'STAC_LiDAR/PointClouds/bc_092o018_4_3_3_xyes_12_utm10_2018.laz': 'STAC_LiDAR/

In [87]:
laz_id_to_items={}
os.environ["AWS_NO_SIGN_REQUEST"] = "true"
s3 = boto3.resource('s3')

for id, laz_info in laz_dict.items():
    for key in laz_info:
        # print(id)
        # print(key)
        # print(laz_info[key])
        laz_uri=key
        print(f"Processing {laz_uri}")
        #read json for corresponding laz file from s3
        laz_json=s3_client.get_object(Bucket=test_bucket, Key=laz_info[key])
        json_text = laz_json["Body"].read().decode()
        json_content= json.loads(json_text)

        # Extract bbox coordinates
        bbox_coords = json_content['bbox']
        left, bottom, right, top = bbox_coords[0], bbox_coords[1], bbox_coords[3], bbox_coords[4]   
        # Create bounding box
        bbox = [left, bottom, right, top]
        footprint = Polygon([(left, bottom), (right, bottom), (right, top), (left, top), (left, bottom)])
        footprint=mapping(footprint)
        
        point_count=(json_content['properties']['pc:count'])
        point_density=(json_content['properties']["pc:density"])
        point_schemas=(json_content['properties']["pc:schemas"])   # need to fix schema some how https://pystac.readthedocs.io/en/latest/api/extensions/pointcloud.html#pystac.extensions.pointcloud.Schema
        point_type=(json_content['properties']["pc:type"])
        point_stats=(json_content['properties']["pc:statistics"])
        point_epsg=(json_content['properties']["pc:epsg"])
        
        print(point_schemas)
        
        item=pystac.Item(
            id=f"laz{id}", 
            geometry=footprint,
            bbox=bbox,
            datetime=datetime.utcnow(),
            properties={},
        )
        item.validate
        
        #look to see if any of the common_metadata would be good for us
        # maybe license?
        #https://pystac.readthedocs.io/en/latest/api/common_metadata.html
        # item.common_metadata.gsd = 0.3
        # item.common_metadata.platform = "Maxar"
        # item.common_metadata.instruments = ["WorldView3"]
        item.common_metadata.object=key 
        
        
        #set point cloud extension
        pc_ext = PointcloudExtension.ext(item,  add_if_missing=True).apply(
            count= point_count,
            type= point_type,
            encoding= "binary",                                   # not sure what to put for this??????
            schemas= point_schemas,
            density= point_density,
            statistics=point_stats #,
            #epsg= point_epsg
        )
        
        #both json and laz should be the assets 

        
        asset=pystac.Asset(href=laz_info, media_type='application/octet-stream')
        item.add_asset(key=f"laz{id}" , asset=asset)
        ext = AssetPointcloudExtension(asset)
        
        
        #both json and laz should be the assets 
        
        # https://pystac.readthedocs.io/en/latest/api/extensions/pointcloud.html
        
        
        asset = pystac.Asset(href=laz_info[key], media_type=pystac.MediaType.JSON)
        item.add_asset(key=f"json{id}", asset=asset)
        
        
    break

Processing STAC_LiDAR/PointClouds/bc_092o018_3_2_4_xyes_12_utm10_2018.laz
[{'name': 'X', 'size': 8, 'type': 'floating'}, {'name': 'Y', 'size': 8, 'type': 'floating'}, {'name': 'Z', 'size': 8, 'type': 'floating'}, {'name': 'Intensity', 'size': 2, 'type': 'unsigned'}, {'name': 'ReturnNumber', 'size': 1, 'type': 'unsigned'}, {'name': 'NumberOfReturns', 'size': 1, 'type': 'unsigned'}, {'name': 'ScanDirectionFlag', 'size': 1, 'type': 'unsigned'}, {'name': 'EdgeOfFlightLine', 'size': 1, 'type': 'unsigned'}, {'name': 'Classification', 'size': 1, 'type': 'unsigned'}, {'name': 'Synthetic', 'size': 1, 'type': 'unsigned'}, {'name': 'KeyPoint', 'size': 1, 'type': 'unsigned'}, {'name': 'Withheld', 'size': 1, 'type': 'unsigned'}, {'name': 'Overlap', 'size': 1, 'type': 'unsigned'}, {'name': 'ScanAngleRank', 'size': 4, 'type': 'floating'}, {'name': 'UserData', 'size': 1, 'type': 'unsigned'}, {'name': 'PointSourceId', 'size': 2, 'type': 'unsigned'}, {'name': 'GpsTime', 'size': 8, 'type': 'floating'}]


TypeError: PointcloudExtension.apply() missing 1 required positional argument: 'schemas'

In [48]:
#Create collection
footprints = list(map(lambda i: shape(i.geometry).envelope, laz_items_id.values()))
collection_bbox = MultiPolygon(footprints).bounds
spatial_extent = pystac.SpatialExtent(bboxes=[collection_bbox])
datetimes = sorted(list(map(lambda i: i.datetime, laz_items_id.values())))
temporal_extent = pystac.TemporalExtent(intervals=[[datetimes[0], datetimes[-1]]])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

collection = pystac.Collection(
    id="Lidar-BC-Test",
    description="Test STAC of BC Open Lidar in STAC",
    extent=collection_extent,
    license="BC Open Data ",
)

collection.add_items(laz_items_id.values())

AttributeError: 'dict' object has no attribute 'geometry'

In [27]:
collection.describe()

* <Collection id=Lidar-BC-Test>
  * <Item id=laz0>
  * <Item id=laz1>
  * <Item id=laz2>
  * <Item id=laz3>
  * <Item id=laz4>
  * <Item id=laz5>
  * <Item id=laz6>
  * <Item id=laz7>
  * <Item id=laz8>


In [28]:
#create STAC 
catalog = pystac.Catalog(id="lidar-test", description="Test catalog for the potential use of STAC to access open LiDAR Data")
catalog.add_child(collection)

In [29]:
catalog.describe()

* <Catalog id=lidar-test>
    * <Collection id=Lidar-BC-Test>
      * <Item id=laz0>
      * <Item id=laz1>
      * <Item id=laz2>
      * <Item id=laz3>
      * <Item id=laz4>
      * <Item id=laz5>
      * <Item id=laz6>
      * <Item id=laz7>
      * <Item id=laz8>
