# Create STAC Catalog of .laz Files
CFolkers
Geospatial Services 
2024 03 04

modified from https://github.com/stac-utils/pystac/blob/8079dd3c0cbe8f6f9e48f499ea90f6a5798eaeab/docs/tutorials/how-to-create-stac-catalogs.ipynb

In [22]:
import os 
import pystac
from pystac import Link
from pystac.stac_io import DefaultStacIO
from botocore import UNSIGNED
from botocore.config import Config
import boto3
import constants
import os
import urllib.request
import json
from typing import Union, Any
from urllib.parse import urlparse
from shapely.geometry import Polygon, mapping,shape, MultiPolygon

from datetime import datetime

In [2]:
#list .laz objects in bucket
object_key=r'STAC_LiDAR/PointClouds/'
#s3 storage location for json files
json_key=r'STAC_LiDAR/JSON/'

In [3]:
# use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
test_bucket = constants.AWS_S3_BUCKET

In [4]:
class CustomStacIO(DefaultStacIO):
    def __init__(self):
        self.s3_client = boto3.resource("s3")
        super().__init__()

    def read_text(self, source: Union[str, Link], *args: Any, **kwargs: Any) -> str:
        parsed = urlparse(source)
        if parsed.scheme == "s3":
            bucket = parsed.netloc
            key = parsed.path[1:]

            obj = self.s3_client.Object(bucket, key)
            return obj.get()["Body"].read().decode("utf-8")
        else:
            return super().read_text(source, *args, **kwargs)

    def write_text(
        self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any
    ) -> None:
        parsed = urlparse(dest)
        if parsed.scheme == "s3":
            bucket = parsed.netloc
            key = parsed.path[1:]
            self.s3_client.Object(bucket, key).put(Body=txt, ContentEncoding="utf-8")
        else:
            super().write_text(dest, txt, *args, **kwargs)


In [7]:
#need more dynamic way to create dictionary in case there things are not in order, or missing values 
# also not sure which dict I can keep
#laz dict returns a dict of s3 locations for laz:json, maybe do this the same as below dict of dict with index as first key, then laz:json?
#laz_items_id returns a dict of dicts of index_num:{'type':stac location}

laz_dict={}
laz_items_id={}

laz_response = s3_client.list_objects_v2(Bucket=test_bucket, Prefix=object_key, StartAfter=object_key)
json_repsone= s3_client.list_objects_v2(Bucket=test_bucket, Prefix=json_key, StartAfter=json_key)
if 'Contents' in laz_response and 'Contents' in json_repsone:
    # Iterate over objects and print their names
    for index, (key, value) in enumerate(zip(laz_response['Contents'],json_repsone['Contents'])):
        laz_dict[key['Key']]=value['Key']
        laz_items_id[str(index)]={"laz":f"s3://{test_bucket}/{key['Key']}"}
else:
    print("No objects found in the bucket.")
print(laz_dict)
print(laz_items_id)

{'STAC_LiDAR/PointClouds/bc_092o018_3_2_4_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_3_2_4_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_3_4_2_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_3_4_2_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_3_4_4_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_3_4_4_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_4_1_3_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_1_3_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_4_1_4_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_1_4_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_4_3_1_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_3_1_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_4_3_2_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_3_2_xyes_12_utm10_2018.json', 'STAC_LiDAR/PointClouds/bc_092o018_4_3_3_xyes_12_utm10_2018.laz': 'STAC_LiDAR/JSON/bc_092o018_4_3_3_xyes_12_utm10_201

In [21]:
laz_id_to_items={}
os.environ["AWS_NO_SIGN_REQUEST"] = "true"
s3 = boto3.resource('s3')
for id, laz in zip(laz_items_id,laz_dict):
    # laz_uri=laz_items_id[id]['laz']
    laz_uri=laz
    print(f"Processing {laz_uri}")
    #read json for corresponding laz file from s3 
    laz_json=s3_client.get_object(Bucket=test_bucket, Key=laz_dict[laz])
    json_text = laz_json["Body"].read().decode()
    json_content= json.loads(json_text)

    # Extract bbox coordinates
    bbox_coords = json_content['bbox']
    left, bottom, right, top = bbox_coords[0], bbox_coords[1], bbox_coords[3], bbox_coords[4]   
    # Create bounding box
    bbox = [left, bottom, right, top]
    footprint = Polygon([(left, bottom), (right, bottom), (right, top), (left, top), (left, bottom)])
    footprint=mapping(footprint)
    
    item=pystac.Item(
        id=f"laz{id}", 
        geometry=footprint,
        bbox=bbox,
        datetime=datetime.utcnow(),
        properties={},
    )
    
    #look to see if any of the common_metadata would be good for us
    # maybe license?
    #https://pystac.readthedocs.io/en/latest/api/common_metadata.html
    # item.common_metadata.gsd = 0.3
    # item.common_metadata.platform = "Maxar"
    # item.common_metadata.instruments = ["WorldView3"]
    
    
    #should this be the laz file and not the json?!?!?!?! but there is no media type for laz
    #https://pystac.readthedocs.io/en/stable/api/media_type.html
    
    asset = pystac.Asset(href=laz_dict[laz], media_type=pystac.MediaType.JSON)
    item.add_asset(key="bc", asset=asset)
    
    laz_items_id[id] = item
    


Processing STAC_LiDAR/PointClouds/bc_092o018_3_2_4_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_3_4_2_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_3_4_4_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_4_1_3_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_4_1_4_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_4_3_1_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_4_3_2_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_4_3_3_xyes_12_utm10_2018.laz
Processing STAC_LiDAR/PointClouds/bc_092o018_4_3_4_xyes_12_utm10_2018.laz


In [26]:
#Create collection
footprints = list(map(lambda i: shape(i.geometry).envelope, laz_items_id.values()))
collection_bbox = MultiPolygon(footprints).bounds
spatial_extent = pystac.SpatialExtent(bboxes=[collection_bbox])
datetimes = sorted(list(map(lambda i: i.datetime, laz_items_id.values())))
temporal_extent = pystac.TemporalExtent(intervals=[[datetimes[0], datetimes[-1]]])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

collection = pystac.Collection(
    id="Lidar-BC-Test",
    description="Test STAC of BC Open Lidar in STAC",
    extent=collection_extent,
    license="BC Open Data ",
)

collection.add_items(laz_items_id.values())

[<Link rel=item target=<Item id=laz0>>,
 <Link rel=item target=<Item id=laz1>>,
 <Link rel=item target=<Item id=laz2>>,
 <Link rel=item target=<Item id=laz3>>,
 <Link rel=item target=<Item id=laz4>>,
 <Link rel=item target=<Item id=laz5>>,
 <Link rel=item target=<Item id=laz6>>,
 <Link rel=item target=<Item id=laz7>>,
 <Link rel=item target=<Item id=laz8>>]

In [27]:
collection.describe()

* <Collection id=Lidar-BC-Test>
  * <Item id=laz0>
  * <Item id=laz1>
  * <Item id=laz2>
  * <Item id=laz3>
  * <Item id=laz4>
  * <Item id=laz5>
  * <Item id=laz6>
  * <Item id=laz7>
  * <Item id=laz8>


In [28]:
#create STAC 
catalog = pystac.Catalog(id="lidar-test", description="Test catalog for the potential use of STAC to access open LiDAR Data")
catalog.add_child(collection)

In [29]:
catalog.describe()

* <Catalog id=lidar-test>
    * <Collection id=Lidar-BC-Test>
      * <Item id=laz0>
      * <Item id=laz1>
      * <Item id=laz2>
      * <Item id=laz3>
      * <Item id=laz4>
      * <Item id=laz5>
      * <Item id=laz6>
      * <Item id=laz7>
      * <Item id=laz8>
