In [1]:
import os
import json
import rasterio
import urllib.request
import pystac
from datetime import datetime, timezone
from shapely.geometry import Polygon, mapping
from tempfile import TemporaryDirectory
import logging
import constants
import boto3
from botocore.exceptions import ClientError
from glob import glob

loading dot env...


In [2]:
#set up logging 
logging.basicConfig(level=logging.DEBUG)
debug=logging.debug
info=logging.info
warning=logging.warning
error=logging.error

In [None]:
# location for STAC jsons 
stac_loc=r"STAC_DEM/Catalog/"

#list DEM objects in bucket
object_key=r"STAC_DEM/Data/"


# # use third party object storage to create an S3 Client
s3_client = boto3.client(
    "s3",
    endpoint_url=constants.AWS_S3_ENDPOINT,
    aws_access_key_id=constants.AWS_ACCESS_KEY_ID,
    aws_secret_access_key=constants.AWS_SECRET_ACCESS_KEY,
)
# for some reason the bucket is adding an extra letter at the end???
bucket = constants.AWS_S3_BUCKET



In [4]:
def create_url(bucket_name: str,
            object_name: str):
 
    """
           
    This function takes a bucket name, an object name, and an expiration time (in seconds) and generates a URL download link for the object.

    Arguments:
        bucket_name: String of name of the bucket
        object_name: Name of the object (key) that the URL will be pointed to

    Returns:
        Link of output (object download) URL
        
    Raises: 
        Exceptions raised will display an error message and be logged in the export.log file
    """
    try:
        if r':443' in constants.AWS_S3_ENDPOINT:
            endpoint=constants.AWS_S3_ENDPOINT.split(':')
            endpoint=fr"{endpoint[0]}:{endpoint[1]}"
        else:
            endpoint=constants.AWS_S3_ENDPOINT
        response=os.path.join(endpoint,bucket_name,object_name)
    except ClientError as e:
        print(e)
        return None
    return response

def set_permissions(bucket_name: str,
                    object_name: str,
                    permissions='public-read'):
    """
    This function takes a bucket name, an object name, and a permissions value (specified below) and sets the object's permissions to the value given.

    Arguments:
        bucket_name: String of name of the bucket
        object_name: Name of the object (key) that the URL will be pointed to
        permissions: If not specified, the permissions will default to 'public-read'. Otherwise, permissions can be found below:
        'private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control'

    Returns:
        Nothing
        
    Raises: 
        Exceptions raised will display an error message and be logged in the export.log file 
    """

    try:
        response = s3_client.put_object_acl(ACL=permissions, Bucket=bucket_name, Key=object_name)
        print(f'Set permissions on {object_name} success, set to {permissions}')
    except Exception as e:
        print(f'Error when setting permission: double check permission: {permissions}. Refer to help(set_permissions) for documentation.')
        print(e)

    return

In [5]:
#function to get data for json
def get_bbox_and_footprint(raster):
    with rasterio.open(raster) as r:
        bounds = r.bounds
        bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
        footprint = Polygon([
            [bounds.left, bounds.bottom],
            [bounds.left, bounds.top],
            [bounds.right, bounds.top],
            [bounds.right, bounds.bottom]
        ])
        
        return (bbox, mapping(footprint))

In [None]:
# find all DEM objects in s3 and add to a list, and check to make sure they have an object size 

DEM_Objects=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix=object_key, StartAfter=object_key)

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        DEM_Objects.append(obj['Key'])
        info(obj['Key'])
        info(f"Object Size {obj['Size']}")
        
else:
    error("No objects found in the bucket.")


In [7]:
#create base url for all json hrefs 
base_url=create_url(bucket, stac_loc)
catalog_name='DEM_Test'


In [16]:

# create STAC Catalog 
catalog = pystac.Catalog(id=catalog_name,
                        description='This catalog is a basic demonstration catalog utilizing using nine Digital Elevation Models, in the cloud optimized geotif(COG) format, that overlap with the point cloud test',
                        href=os.path.join(base_url, f"{catalog_name}.json")
                        )

In [None]:
#check properties 
print(json.dumps(catalog.to_dict(), indent=4))

In [None]:
#get all asset hrefs and set permissions to public 
dem_url=[]
for d in DEM_Objects:
    info(d)
    set_permissions(bucket, d) # default is public-read
    url=create_url(bucket, d)
    dem_url.append(url)

In [None]:
#get base bucket url
base_url=create_url(bucket, stac_loc)

#loop through s3 DEM url and S3 DEM and create item and assets
for dem, s3_dem in zip(dem_url, DEM_Objects):
    info(dem)
    info(s3_dem)
    name=s3_dem.split('/')[-1].split('.')[0]
    info(name)
    bbox, footprint = get_bbox_and_footprint(dem)
    info("bbox: ", bbox, "\n")
    info("footprint: ", footprint)
    datetime_utc = datetime.now(tz=timezone.utc)
    #predict item href before creation? might be a bad idea, roll the dice need to make sure all hrefs get set to public with Matts function after 
    item_href=os.path.join(base_url, name, f"{name}.json")
    info(f"Item Href is : {item_href}")
    item = pystac.Item(id=name,
                    geometry=footprint,
                    bbox=bbox,
                    datetime=datetime_utc,
                    href= item_href,
                    properties={})
    catalog.add_item(item)
 
    # Modify the S3 path to remove the prefix
    s3_dem_key = os.path.relpath(s3_dem, object_key)
    info(f"s3 dem key : {s3_dem_key}")
    #add asset 
    item.add_asset(
    key=name,
    asset=pystac.Asset(
        href=dem,
        media_type=pystac.MediaType.COG
    )
)

In [None]:
#view parent data
item.get_parent()

In [None]:
#visualize STAC Architecture, be careful when using it on large catalogs, as it will walk the entire tree of the STAC.
catalog.describe()

In [None]:
# view the last item added with all of it's properties 
info(json.dumps(item.to_dict(), indent=4))

In [None]:
print("Catalog HREF: ", catalog.get_self_href())
print("Item HREF: ", item.get_self_href())

In [None]:
#save catalog as relative plubished so it can be accessed online?
# catalog.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED)

In [24]:
#save catalog self contained or relative published???? still not sure

ouput_loc=r"/home/cfolkers/STAC_LiDAR/BC_Webmap_Lidar_STAC/src/STAC/STAC_DEM/Catalogs"
catalog.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED, dest_href=ouput_loc)

In [73]:
# #make all hrefs relative and save as self contained?
# catalog.make_all_asset_hrefs_relative()
# catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED, dest_href=ouput_loc)

In [None]:
#something in here is not working properly, weirdly long time to upload

def uploadDirectory(path,bucketname, s3_root):
        for root,dirs,files in os.walk(path):
            for file in files:
                print(os.path.join(root,file))
                print(bucketname)
                if "DEM_Test" not in file:
                    s3_out=os.path.join(s3_root,file.split('.')[0],file)
                else: 
                    s3_out=os.path.join(s3_root,file)
                print(s3_out)
                s3_client.upload_file(os.path.join(root,file),bucketname,file)
                
uploadDirectory(r"/home/cfolkers/STAC_LiDAR/BC_Webmap_Lidar_STAC/src/STAC/STAC_DEM/Catalogs", bucket,'STAC_DEM/Catalogs' )

In [None]:
in_dir=r"/home/cfolkers/STAC_LiDAR/BC_Webmap_Lidar_STAC/src/STAC/STAC_DEM/Catalogs"
#set up transfer? seemed to work
transfer=boto3.s3.transfer.S3Transfer(s3_client)

for root,dirs,files in os.walk(in_dir):
    in_file=os.path.join(root,files[0])
    #check for file existance
    if os.path.isfile(in_file) is True:
        info(f"in path: {in_file}")
        if catalog_name in files[0]:
            out_file=os.path.join(stac_loc,files[0])
        else:
            
            ex_dir=files[0].split('.')[0]
            out_file=os.path.join(stac_loc,ex_dir,files[0])
            # s3_client.put_object(Bucket=bucket, Key=os.path.join(stac_loc,ex_dir))
            s3_client.delete_object(Bucket=bucket, Key=os.path.join(stac_loc,ex_dir))
    

    #     info(f"out path: {out_file}")
    #     transfer.upload_file(in_file, bucket, out_file)
    #     info(f"{files[0]} put into {out_file} ")
        
    # else:
    #     info("file does not exist!!!!")
        
    



In [None]:
catalog_list=[]

response = s3_client.list_objects_v2(Bucket=bucket, Prefix="STAC_DEM/Catalog")

if 'Contents' in response:
    # Iterate over objects and print their names
    for obj in response['Contents']:
        catalog_list.append(obj['Key'])
        info(obj['Key'])
        info(f"Object Size {obj['Size']}")
        
else:
    error("No objects found in the bucket.")

In [None]:
#make all dem objects public with url
for c in catalog_list:
    set_permissions(bucket, c) # default is public-read
    url=create_url(bucket, c)
    print(F"THIS IS THE URL {url}   !!!!!!!!!!!!!!!!!!!!!!!!")