In [1]:
import os
import json
import rasterio
import urllib.request
import pystac
from datetime import datetime, timezone
from shapely.geometry import Polygon, mapping
from tempfile import TemporaryDirectory
import logging
import constants
import boto3
from botocore.exceptions import ClientError

loading dot env...


In [2]:
#set up logging 
logging.basicConfig(level=logging.DEBUG)
debug=logging.debug
info=logging.info
warning=logging.warning
error=logging.error

In [3]:
# location for STAC catalogs
stac_loc=r"STAC_LiDAR/catalogs/"

#list DEM objects in bucket
object_key=r"STAC_LiDAR/DEM/"

#s3 storage location for json files
json_out_loc=r'STAC_LiDAR/JSON/'

# use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
bucket = constants.AWS_S3_BUCKET



DEBUG:botocore.hooks:Changing event name from creating-client-class.iot-data to creating-client-class.iot-data-plane
DEBUG:botocore.hooks:Changing event name from before-call.apigateway to before-call.api-gateway
DEBUG:botocore.hooks:Changing event name from request-created.machinelearning.Predict to request-created.machine-learning.Predict
DEBUG:botocore.hooks:Changing event name from before-parameter-build.autoscaling.CreateLaunchConfiguration to before-parameter-build.auto-scaling.CreateLaunchConfiguration
DEBUG:botocore.hooks:Changing event name from before-parameter-build.route53 to before-parameter-build.route-53
DEBUG:botocore.hooks:Changing event name from request-created.cloudsearchdomain.Search to request-created.cloudsearch-domain.Search
DEBUG:botocore.hooks:Changing event name from docs.*.autoscaling.CreateLaunchConfiguration.complete-section to docs.*.auto-scaling.CreateLaunchConfiguration.complete-section
DEBUG:botocore.hooks:Changing event name from before-parameter-buil

In [4]:
# find all DEM objects in s3 and add to a list, and check to make sure they have an object size 
DEM_Objects=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=object_key, StartAfter=object_key)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        DEM_Objects.append(obj['Key'])
        info(obj['Key'])
        info(f"Object Size {obj['Size']}")
        
else:
    error("No objects found in the bucket.")

DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function set_list_objects_encoding_type_url at 0x7fb1afa0f7e0>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function validate_bucket_name at 0x7fb1afa0e340>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function remove_bucket_from_url_paths_from_model at 0x7fb1afa304a0>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <bound method S3RegionRedirectorv2.annotate_request_context of <botocore.utils.S3RegionRedirectorv2 object at 0x7fb1aea7b510>>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <bound method S3ExpressIdentityResolver.inject_signing_cache_key of <botocore.utils.S3ExpressIdentityResolver object at 0x7fb1af3be3d0>>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function generate_idempotent_uuid at 0x7fb1afa0e

In [5]:
#find all DEMs 
 
dem_path=r'/home/cfolkers/STAC_LiDAR/DEM/'

og_dems=os.listdir(dem_path)

info(og_dems)

# for dem in og_dems:
#     og_loc=os.path.join(dem_path,og_dems[1])

INFO:root:['bc_092o019_xli1m_utm10_2018_2_cog.tif', 'bc_092o018_xli1m_utm10_2018_cog.tif', 'bc_092o020_xli1m_utm10_2018_2_cog.tif', 'bc_092o029_xli1m_utm10_2019_cog.tif', 'bc_092o020_xli1m_utm10_2018_cog.tif', 'bc_092o018_xli1m_utm10_2018_2_cog.tif', 'bc_092o030_xli1m_utm10_2018_cog.tif', 'bc_092o028_xli1m_utm10_2018_cog.tif', 'bc_092o019_xli1m_utm10_2018_cog.tif']


In [6]:

# create STAC Catalog 
catalog = pystac.Catalog(id='DEM_Test',
                        description='This catalog is a basic demonstration catalog utilizing using nine Digital Elevation Models, in the cloud optimized geotif(COG) format, that overlap with the point cloud test'
                        )

In [7]:
#check properties 
print(json.dumps(catalog.to_dict(), indent=4))

{
    "type": "Catalog",
    "id": "DEM_Test",
    "stac_version": "1.0.0",
    "description": "This catalog is a basic demonstration catalog utilizing using nine Digital Elevation Models, in the cloud optimized geotif(COG) format, that overlap with the point cloud test",
    "links": []
}


In [8]:
#function to get data for json
def get_bbox_and_footprint(raster):
    with rasterio.open(raster) as r:
        bounds = r.bounds
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
        footprint = Polygon([
            [bounds.left, bounds.bottom],
            [bounds.left, bounds.top],
            [bounds.right, bounds.top],
            [bounds.right, bounds.bottom]
        ])
        
        return (bbox, mapping(footprint))

In [9]:
#loop through local DEM and S3 DEM and create item and assets
# once able to access DEMs directly from S3 change code and remove the local connection
for dem, s3_dem in zip(og_dems, DEM_Objects):
    bbox, footprint = get_bbox_and_footprint(os.path.join(dem_path,dem))
    print("bbox: ", bbox, "\n")
    print("footprint: ", footprint)
    datetime_utc = datetime.now(tz=timezone.utc)
    item = pystac.Item(id=dem,
                    geometry=footprint,
                    bbox=bbox,
                    datetime=datetime_utc,
                    properties={})
    catalog.add_item(item)
 
    # Modify the S3 path to remove the prefix
    s3_dem_key = os.path.relpath(s3_dem, object_key)
    
    #add asset 
    item.add_asset(
    key=dem,
    asset=pystac.Asset(
        href=s3_dem,
        media_type=pystac.MediaType.COG
    )
)

DEBUG:rasterio.env:Entering env context: <rasterio.env.Env object at 0x7fb1aea96c10>
DEBUG:rasterio.env:Starting outermost env
DEBUG:rasterio.env:No GDAL environment exists
DEBUG:rasterio.env:New GDAL environment <rasterio._env.GDALEnv object at 0x7fb23c40b040> created
DEBUG:rasterio._filepath:Installing FilePath filesystem handler plugin...
DEBUG:rasterio._env:GDAL_DATA found in environment.
DEBUG:rasterio._env:PROJ_DATA found in environment.
DEBUG:rasterio._env:Started GDALEnv: self=<rasterio._env.GDALEnv object at 0x7fb23c40b040>.
DEBUG:rasterio.env:Entered env context: <rasterio.env.Env object at 0x7fb1aea96c10>
DEBUG:rasterio._base:Sharing flag: 0
DEBUG:rasterio._base:Nodata success: 1, Nodata value: -32767.000000
DEBUG:rasterio._base:Dataset <open DatasetReader name='/home/cfolkers/STAC_LiDAR/DEM/bc_092o019_xli1m_utm10_2018_2_cog.tif' mode='r'> is started.
DEBUG:rasterio.env:Exiting env context: <rasterio.env.Env object at 0x7fb1aea96c10>
DEBUG:rasterio.env:Cleared existing <rast

bbox:  [541951.0, 5661212.0, 556016.0, 5670352.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((541951.0, 5661212.0), (541951.0, 5670352.0), (556016.0, 5670352.0), (556016.0, 5661212.0), (541951.0, 5661212.0)),)}
bbox:  [533696.0, 5666237.0, 541953.0, 5672238.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((533696.0, 5666237.0), (533696.0, 5672238.0), (541953.0, 5672238.0), (541953.0, 5666237.0), (533696.0, 5666237.0)),)}
bbox:  [555992.0, 5661249.0, 561120.0, 5663233.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((555992.0, 5661249.0), (555992.0, 5663233.0), (561120.0, 5663233.0), (561120.0, 5661249.0), (555992.0, 5661249.0)),)}
bbox:  [542485.0, 5672237.0, 555894.0, 5683491.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((542485.0, 5672237.0), (542485.0, 5683491.0), (555894.0, 5683491.0), (555894.0, 5672237.0), (542485.0, 5672237.0)),)}


DEBUG:botocore.hooks:Changing event name from before-parameter-build.cloudsearchdomain.Search to before-parameter-build.cloudsearch-domain.Search
DEBUG:botocore.hooks:Changing event name from docs.*.cloudsearchdomain.Search.complete-section to docs.*.cloudsearch-domain.Search.complete-section
DEBUG:botocore.utils:IMDS ENDPOINT: http://169.254.169.254/
DEBUG:botocore.credentials:Looking for credentials via: env
INFO:botocore.credentials:Found credentials in environment variables.
DEBUG:rasterio.env:Entering env context: <rasterio.env.Env object at 0x7fb1aea95f10>
DEBUG:rasterio.env:Starting outermost env
DEBUG:rasterio.env:No GDAL environment exists
DEBUG:rasterio.env:New GDAL environment <rasterio._env.GDALEnv object at 0x7fb1ae5fe5c0> created
DEBUG:rasterio._env:GDAL_DATA found in environment.
DEBUG:rasterio._env:PROJ_DATA found in environment.
DEBUG:rasterio._env:Started GDALEnv: self=<rasterio._env.GDALEnv object at 0x7fb1ae5fe5c0>.
DEBUG:rasterio.env:Entered env context: <rasterio.

bbox:  [555893.0, 5661307.0, 567624.0, 5672410.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((555893.0, 5661307.0), (555893.0, 5672410.0), (567624.0, 5672410.0), (567624.0, 5661307.0), (555893.0, 5661307.0)),)}
bbox:  [539056.0, 5663025.0, 541996.0, 5668306.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((539056.0, 5663025.0), (539056.0, 5668306.0), (541996.0, 5668306.0), (541996.0, 5663025.0), (539056.0, 5663025.0)),)}
bbox:  [555803.0, 5672369.0, 559397.0, 5681181.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((555803.0, 5672369.0), (555803.0, 5681181.0), (559397.0, 5681181.0), (559397.0, 5672369.0), (555803.0, 5672369.0)),)}
bbox:  [535246.0, 5672186.0, 541921.0, 5678033.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((535246.0, 5672186.0), (535246.0, 5678033.0), (541921.0, 5678033.0), (541921.0, 5672186.0), (535246.0, 5672186.0)),)}
bbox:  [541919.0, 5663231.0, 555994.0, 5672371.0] 

footprint:  {'type': 'Polygon', 'coordinates': (((541919.0, 5663231.0), 

In [10]:
#view parent data
item.get_parent()

In [11]:
#visualize STAC Architecture, be careful when using it on large catalogs, as it will walk the entire tree of the STAC.
catalog.describe()

* <Catalog id=DEM_Test>
  * <Item id=bc_092o019_xli1m_utm10_2018_2_cog.tif>
  * <Item id=bc_092o018_xli1m_utm10_2018_cog.tif>
  * <Item id=bc_092o020_xli1m_utm10_2018_2_cog.tif>
  * <Item id=bc_092o029_xli1m_utm10_2019_cog.tif>
  * <Item id=bc_092o020_xli1m_utm10_2018_cog.tif>
  * <Item id=bc_092o018_xli1m_utm10_2018_2_cog.tif>
  * <Item id=bc_092o030_xli1m_utm10_2018_cog.tif>
  * <Item id=bc_092o028_xli1m_utm10_2018_cog.tif>
  * <Item id=bc_092o019_xli1m_utm10_2018_cog.tif>


In [12]:
# view the last item added with all of it's properties 
info(json.dumps(item.to_dict(), indent=4))

INFO:root:{
    "type": "Feature",
    "stac_version": "1.0.0",
    "id": "bc_092o019_xli1m_utm10_2018_cog.tif",
    "properties": {
        "datetime": "2024-05-06T20:12:32.124363Z"
    },
    "geometry": {
        "type": "Polygon",
        "coordinates": [
            [
                [
                    541919.0,
                    5663231.0
                ],
                [
                    541919.0,
                    5672371.0
                ],
                [
                    555994.0,
                    5672371.0
                ],
                [
                    555994.0,
                    5663231.0
                ],
                [
                    541919.0,
                    5663231.0
                ]
            ]
        ]
    },
    "links": [
        {
            "rel": "root",
            "href": null,
            "type": "application/json"
        },
        {
            "rel": "parent",
            "href": null,
            "type"

In [17]:
catalog.normalize_hrefs(root_href=os.path.join("catalogs"))

In [18]:
print("Catalog HREF: ", catalog.get_self_href())
print("Item HREF: ", item.get_self_href())

Catalog HREF:  /home/cfolkers/STAC_LiDAR/BC_Webmap_Lidar_STAC/src/STAC/catalogs/catalog.json
Item HREF:  /home/cfolkers/STAC_LiDAR/BC_Webmap_Lidar_STAC/src/STAC/catalogs/bc_092o019_xli1m_utm10_2018_cog.tif/bc_092o019_xli1m_utm10_2018_cog.tif.json


In [29]:
# for now a self contained catalog will be uploaded to s3? 
# https://pystac.readthedocs.io/en/stable/api/pystac.html#catalogtype

In [19]:
#save catalog self contained 
#A ‘self-contained catalog’ is one that is designed for portability. 
# Users may want to download an online catalog from and be able to use it on their local computer, so all links need to be relative.
catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)

In [22]:
#Absolute Published Catalog is a catalog that uses absolute links for everything,
# both in the links objects and in the asset hrefs.

# catalog.save(catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED)

In [33]:
# catalog.make_all_asset_hrefs_relative()
# catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)

In [91]:
def uploadDirectory(path,bucketname):
        for root,dirs,files in os.walk(path):
            for file in files:
                s3_client.upload_file(os.path.join(root,file),bucketname,file)

In [70]:
uploadDirectory(r"/home/cfolkers/STAC_LiDAR/BC_Webmap_Lidar_STAC/src/catalogs/dem", bucket)

In [74]:
response = s3_client.list_objects_v2(Bucket=bucket, Prefix="STAC_LiDAR/", StartAfter=object_key)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        DEM_Objects.append(obj['Key'])
        info(obj['Key'])
        info(f"Object Size {obj['Size']}")
        
else:
    error("No objects found in the bucket.")

DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function set_list_objects_encoding_type_url at 0x7f72a18ee980>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function validate_bucket_name at 0x7f72a18ed4e0>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function remove_bucket_from_url_paths_from_model at 0x7f72a18ef600>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <bound method S3RegionRedirectorv2.annotate_request_context of <botocore.utils.S3RegionRedirectorv2 object at 0x7f729e71ba90>>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <bound method S3ExpressIdentityResolver.inject_signing_cache_key of <botocore.utils.S3ExpressIdentityResolver object at 0x7f729e59cb90>>
DEBUG:botocore.hooks:Event before-parameter-build.s3.ListObjectsV2: calling handler <function generate_idempotent_uuid at 0x7f72a18ed