In [None]:
import os
import json
import rasterio
from rasterio.transform import from_bounds
import pystac
from datetime import datetime, timezone
from shapely.geometry import Polygon, mapping, shape
from tempfile import TemporaryDirectory
import logging
import constants
import boto3
from botocore.exceptions import ClientError
from glob import glob
from pyproj import Transformer


In [2]:
#set up logging 
logging.basicConfig(level=logging.DEBUG)
debug=logging.debug
info=logging.info
warning=logging.warning
error=logging.error

In [None]:
# location for STAC jsons 
stac_loc=r"STAC_DEM/Catalog/"

#list DEM objects in bucket
object_key=r"STAC_DEM/Data/"


# # use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
bucket = constants.AWS_S3_BUCKET



In [14]:

def create_url(bucket_name: str,
            object_name: str):
 
    """
           
    This function takes a bucket name, an object name, and an expiration time (in seconds) and generates a URL download link for the object.

    Arguments:
        bucket_name: String of name of the bucket
        object_name: Name of the object (key) that the URL will be pointed to

    Returns:
        Link of output (object download) URL
        
    Raises: 
        Exceptions raised will display an error message and be logged in the export.log file
    """
    try:
        if r':443' in constants.AWS_S3_ENDPOINT:
            endpoint=constants.AWS_S3_ENDPOINT.split(':')
            endpoint=fr"{endpoint[0]}:{endpoint[1]}"
        else:
            endpoint=constants.AWS_S3_ENDPOINT
        response=os.path.join(endpoint,bucket_name,object_name)
    except ClientError as e:
        print(e)
        return None
    return response

def set_permissions(bucket_name: str,
                    object_name: str,
                    permissions='public-read'):
    """
    This function takes a bucket name, an object name, and a permissions value (specified below) and sets the object's permissions to the value given.

    Arguments:
        bucket_name: String of name of the bucket
        object_name: Name of the object (key) that the URL will be pointed to
        permissions: If not specified, the permissions will default to 'public-read'. Otherwise, permissions can be found below:
        'private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control'

    Returns:
        Nothing
        
    Raises: 
        Exceptions raised will display an error message and be logged in the export.log file 
    """

    try:
        response = s3_client.put_object_acl(ACL=permissions, Bucket=bucket_name, Key=object_name)
        print(f'Set permissions on {object_name} success, set to {permissions}')
    except Exception as e:
        print(f'Error when setting permission: double check permission: {permissions}. Refer to help(set_permissions) for documentation.')
        print(e)

    return

#function to get data for json
# def get_bbox_and_footprint(raster):
#     with rasterio.open(raster) as r:
#         bounds = r.bounds
#         bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
#         footprint = Polygon([
#             [bounds.left, bounds.bottom],
#             [bounds.left, bounds.top],
#             [bounds.right, bounds.top],
#             [bounds.right, bounds.bottom]
#         ])
        
#         return (bbox, mapping(footprint))

#updated bbox and footprint to return coords in wgs84 
def get_bbox_and_footprint(raster):
    with rasterio.open(raster) as r:
        # Get the bounds of the raster
        bounds = r.bounds
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]

        # Create a transformer to convert from the raster's CRS to EPSG 4326
        transformer = Transformer.from_crs(r.crs, "EPSG:4326", always_xy=True)
        
        # Transform the bounding box coordinates- returns a tuple 
        bbox_transformed = transformer.transform(
            bbox[0], bbox[1]  # lower-left corner
        ) + transformer.transform(
            bbox[2], bbox[3]  # upper-right corner
        )
        print('--------------------')
        
        print(bbox_transformed)

        # Create a footprint polygon using the transformed coordinates
        footprint = Polygon([
            [bbox_transformed[0], bbox_transformed[1]],  # lower-left
            [bbox_transformed[0], bbox_transformed[3]],  # upper-left
            [bbox_transformed[2], bbox_transformed[3]],  # upper-right
            [bbox_transformed[2], bbox_transformed[1]] # lower-right
            
        ])

        return (bbox_transformed, mapping(footprint))


In [None]:
# find all DEM objects in s3 and add to a list, and check to make sure they have an object size 

DEM_Objects=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=object_key, StartAfter=object_key)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        DEM_Objects.append(obj['Key'])
        info(obj['Key'])
        info(f"Object Size {obj['Size']}")
        
else:
    error("No objects found in the bucket.")


In [16]:
#create base url for all json hrefs 
base_url=create_url(bucket, stac_loc)
catalog_name='DEM_Test'


In [None]:
#get all asset hrefs and set permissions to public 
dem_url=[]
for d in DEM_Objects:
    info(d)
    set_permissions(bucket, d) # default is public-read
    url=create_url(bucket, d)
    dem_url.append(url)

In [None]:

# create STAC Catalog 
catalog = pystac.Catalog(id=catalog_name,
                        description='This catalog is a basic demonstration catalog utilizing using nine Digital Elevation Models, in the cloud optimized geotif(COG) format, that overlap with the point cloud test',
                        href=os.path.join(base_url, f"{catalog_name}.json")
                        )

print(json.dumps(catalog.to_dict(), indent=4))

url=create_url(bucket, catalog_name)
print(F"THIS IS THE URL {url}   !!!!!!!!!!!!!!!!!!!!!!!!")

In [None]:
#get base bucket url
base_url=create_url(bucket, stac_loc)

#loop through s3 DEM url and S3 DEM and create item and assets
for dem, s3_dem in zip(dem_url, DEM_Objects):
    info(dem)
    info(s3_dem)
    name=s3_dem.split('/')[-1].split('.')[0]
    info(name)
    bbox, footprint = get_bbox_and_footprint(dem)
    info("bbox: ", bbox, "\n")
    info("footprint: ", footprint)
    datetime_utc = datetime.now(tz=timezone.utc)
    #predict item href before creation? might be a bad idea, roll the dice need to make sure all hrefs get set to public with Matts function after 
    item_href=os.path.join(base_url, name, f"{name}.json")
    info(f"Item Href is : {item_href}")
    item = pystac.Item(id=name,
                    geometry=footprint,
                    bbox=bbox,
                    datetime=datetime_utc,
                    href= item_href,
                    properties={})
    catalog.add_item(item)
 
    # Modify the S3 path to remove the prefix
    s3_dem_key = os.path.relpath(s3_dem, object_key)
    info(f"s3 dem key : {s3_dem_key}")
    #add asset 
    item.add_asset(
    key=name,
    asset=pystac.Asset(
        href=dem,
        media_type=pystac.MediaType.COG
    )
)

In [20]:
#get collection bounds
unioned_footprint = None
datetime_list=[]
item_list=[]
for item in catalog.get_all_items():
    datetime_list.append(item.datetime)
    item_list.append(item)
    footprint = item.geometry
# Convert the footprint geometry to a Shapely shape
    footprint_shape = shape(footprint)
    
    # Perform union operation
    if unioned_footprint is None:
        # If unioned_footprint is None (first iteration), initialize it with the first footprint
        unioned_footprint = footprint_shape
    else:
        # Otherwise, perform union with the current footprint
        unioned_footprint = unioned_footprint.union(footprint_shape)

collection_bbox = list(unioned_footprint.bounds)
spatial_extent = pystac.SpatialExtent(bboxes=[collection_bbox])

In [21]:
collection_interval = sorted(datetime_list)
temporal_extent = pystac.TemporalExtent(intervals=[collection_interval])
collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

In [None]:
colection_id='COG-DEM-Test'
collection_href=os.path.join(base_url, f"{colection_id}.json")

collection = pystac.Collection(id=colection_id,
                               description='Digital Elevation Models for the interior of British Columbia',
                               extent=collection_extent,
                               title='BC-DEM',
                               href=collection_href,
                               license='Apache-2.0')


url=create_url(bucket, f"{colection_id}.json")
print(F"THIS IS THE URL {url}   !!!!!!!!!!!!!!!!!!!!!!!!")

In [None]:
collection.add_items(item_list)
catalog.add_child(collection)

In [None]:
#view parent data
item.get_parent()

In [None]:
#visualize STAC Architecture, be careful when using it on large catalogs, as it will walk the entire tree of the STAC.
catalog.describe()

In [None]:
# view the last item added with all of it's properties 
info(json.dumps(item.to_dict(), indent=4))

In [None]:
print("Catalog HREF: ", catalog.get_self_href())
print("Collection HREF:", collection.get_self_href())
print("Item HREF: ", item.get_self_href())

In [28]:
#save catalog self relative 

ouput_loc=r"STAC_DEM"
catalog.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED, dest_href=ouput_loc)

In [None]:
in_dir=r"STAC_DEM"
#set up transfer? seemed to work
transfer=boto3.s3.transfer.S3Transfer(s3_client)

for root,dirs,files in os.walk(in_dir):
    in_file=os.path.join(root,files[0])
    #check for file existance
    if os.path.isfile(in_file) is True:
        info(f"in path: {in_file}")
        if catalog_name in files[0]:
            out_file=os.path.join(stac_loc,files[0])
        else:
            
            ex_dir=files[0].split('.')[0]
            out_file=os.path.join(stac_loc,ex_dir,files[0])
            # s3_client.put_object(Bucket=bucket, Key=os.path.join(stac_loc,ex_dir))
            # s3_client.delete_object(Bucket=bucket, Key=os.path.join(stac_loc,ex_dir))
    

        info(f"out path: {out_file}")
        transfer.upload_file(in_file, bucket, out_file)
        info(f"{files[0]} put into {out_file} ")
        
    else:
        info("file does not exist!!!!")
        
    



In [None]:
catalog_list=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix="STAC_DEM/Catalog")

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        catalog_list.append(obj['Key'])
        info(obj['Key'])
        info(f"Object Size {obj['Size']}")
        
else:
    error("No objects found in the bucket.")

In [None]:
#make all dem objects public with url
for c in catalog_list:
    set_permissions(bucket, c) # default is public-read
    url=create_url(bucket, c)
    print(F"THIS IS THE URL {url}   !!!!!!!!!!!!!!!!!!!!!!!!")