In [1]:
# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "aiobotocore",
#     "boto3",
#     "stactools-icesat2-boreal",
#     "obstore",
#     "pandas",
#     "pystac",
#     "pystac-client",
#     "stac-pydantic",
#     "tqdm",
#     "geopandas",
# ]
#
# [tool.uv.sources]
# stactools-icesat2-boreal = { git = "https://github.com/MAAP-Project/icesat2-boreal-stac", rev = "feat/v3.0" }
# ///

# Publish icesat2-boreal-v3.0-agb/ht collections

**Author:** Henry Rodman

**Date:** 2025-07-09

This notebook uses the `icesat2-boreal-v3.0` collections to demonstrate how to publish new collections to the MAAP STAC using the SNS-based STAC metadata generation and ingestion infrastructure.

MAAP uses an asynchronous, event-driven workflow for loading metadata into the pgstac database. The components of this system are as follows:
- `StacItemLoader`: SNS topic + SQS queue + Lambda function for loading STAC metadata into pgstac
  - the SNS topic accepts either STAC items/collections or S3 event notifications for STAC json files
  - collections and items are loaded into pgstac in batches from the SQS queue by the Lambda function
  - for more details, see the [`StacLoader` documentation in eoapi-cdk](https://developmentseed.org/eoapi-cdk/#stacloader-)
- `StactoolsItemGenerator`: SNS topic + SQS queue + Lambda function for using stactools packages to generate STAC items
  - the SNS topic expects a message with the stactools package source (e.g. pypi package name, git repo, etc) and the arguments for the `create-item` CLI command
  - the Lambda function uses `uvx` to install the required packages and run the create-item command on-the-fly
  - resulting STAC items are posted to the `StacLoader` SNS topic
  - for more details, see the [`StactoolsItemGenerator` documentation in eoapi-cdk](https://developmentseed.org/eoapi-cdk/#stactoolsitemgenerator-)

The operational workflow for using this infrastructure to publish a new collection to the MAAP STAC is as follows:

1. Copy the assets to the `s3://nasa-maap-data-store` S3 bucket
2. Generate the collection JSON and post it to the `StacLoader` SNS topic
3. Generate STAC items using the `StactoolsItemGenerator` infrastructure
    - create list of stactools-uvx messages, post them to the SNS topic
    - the Lambda function will generate the STAC items and post them to the `StacLoader` SNS topic

This notebook is the first end-to-end test of this process for MAAP!

In [3]:
import asyncio
import io
import json
import time
from pathlib import Path
from typing import Any, Dict, List, Tuple

import boto3
import geopandas as gpd
import obstore as obs
import pandas as pd
from aiobotocore.session import get_session
from obstore.store import S3Store
from stac_pydantic import Collection
from botocore.exceptions import ClientError
from tqdm.asyncio import tqdm

from stactools.icesat2_boreal.stac import create_collection, create_item
from stactools.icesat2_boreal.constants import Variable, TILE_GPKG_BUCKET, TILE_GPKG_KEY

STAC_LOADER_SNS_TOPIC_ARN = "arn:aws:sns:us-west-2:916098889494:MAAP-STAC-test-pgSTAC-stacitemloaderTopicD9D06088-LutBraKgk6sT"
STACTOOLS_ITEM_GENERATOR_SNS_TOPIC_ARN = "arn:aws:sns:us-west-2:916098889494:MAAP-STAC-test-pgSTAC-stactoolsitemgeneratorTopic79301C28-PHC9wUFiJXvn"

MAAP_OPS_BUCKET = "maap-ops-workspace"
ASSET_DEST_BUCKET = "nasa-maap-data-store"

ICESAT2_BOREAL_PREFIX = "file-staging/nasa-map/icesat2-boreal-v3.0"
DEST_KEY_FMT = ICESAT2_BOREAL_PREFIX + "/{variable}/{tile:07d}/{name}"

AWS_REGION = "us-west-2"

agb_inventory_key = "shared/montesano/DPS_tile_lists/BOREAL_MAP/v3.0.0/AGB_H30_2020/full_run/AGB_tindex_master.csv"
ht_inventory_key = "shared/montesano/DPS_tile_lists/BOREAL_MAP/v3.0.0/Ht_H30_2020/full_run/HT_tindex_master.csv"
full_tile_gpkg_key = "shared/montesano/databank/boreal_tiles_v004.gpkg"

sns_client = boto3.client("sns")
s3_client = boto3.client("s3")
full_tile_gdf = gpd.read_file(f"s3://{MAAP_OPS_BUCKET}/{full_tile_gpkg_key}")

Since we have to process two collections, I am defining a few functions to reduce duplicated code.

In [4]:
async def copy_s3_keys(
    source_bucket_name: str,
    destination_bucket_name: str,
    key_mapping_tuples: List[Tuple[str, str]],
    aws_region: str = AWS_REGION,
    max_concurrent_copies: int = 50,
):
    """
    Copies a specific set of S3 objects from one bucket to another asynchronously,
    allowing renaming in the destination, without local download, and displays
    progress using tqdm.

    Args:
        source_bucket_name (str): The name of the source S3 bucket.
        destination_bucket_name (str): The name of the destination S3 bucket.
        key_mapping_tuples (list): A list of tuples, where each tuple is
                                   (source_key, destination_key).
                                   source_key: The key of the object in the source bucket.
                                   destination_key: The desired key for the object
                                                    in the destination bucket.
        aws_region (str): The AWS region of the buckets (e.g., "us-east-1").
        max_concurrent_copies (int): The maximum number of S3 copy operations
                                     to run concurrently. Adjust based on your
                                     AWS account limits and network conditions.
    """
    session = get_session()

    # Create an S3 client asynchronously within an async context manager
    async with session.create_client("s3", region_name=aws_region) as s3_client:
        # Use an asyncio Semaphore to limit the number of concurrent tasks
        # This prevents overwhelming S3 or hitting your connection limits
        semaphore = asyncio.Semaphore(max_concurrent_copies)

        async def _copy_single_object(src_key, dest_key):
            """Helper async function to copy a single S3 object."""
            async with semaphore:
                try:
                    copy_source = {"Bucket": source_bucket_name, "Key": src_key}
                    await s3_client.copy_object(
                        CopySource=copy_source,
                        Bucket=destination_bucket_name,
                        Key=dest_key,
                    )
                    return True
                except ClientError as e:
                    if e.response["Error"]["Code"] == "NoSuchKey":
                        tqdm.write(
                            f"Error: Source key '{src_key}' not found in bucket '{source_bucket_name}'."
                        )
                    else:
                        tqdm.write(f'Error copying "{src_key}" to "{dest_key}": {e}')
                    return False
                except Exception as e:
                    tqdm.write(
                        f"An unexpected error occurred while copying '{src_key}' to '{dest_key}': {e}"
                    )
                    return False

        tasks = []
        for src_key, dest_key in key_mapping_tuples:
            tasks.append(_copy_single_object(src_key, dest_key))

        results = await tqdm.gather(*tasks, desc="Copying S3 Objects", unit="file")

    successful_copies = results.count(True)
    failed_copies = results.count(False)
    if failed_copies > 0:
        print(
            f"\nCompleted S3 copy operation. {successful_copies} files copied successfully, {failed_copies} failed."
        )
    else:
        print(
            f"\nCompleted S3 copy operation. All {successful_copies} files copied successfully."
        )


def read_inventory(inventory_key: str) -> pd.DataFrame:
    """Reads the inventory file with S3 paths to the COG files that should be cataloged.
    The inventory file was roduced by Paul Montesano after all of the DPS jobs were completed.

    Returns: a pandas DataFrame with the inventory information
    """
    maap_ops_store = S3Store(MAAP_OPS_BUCKET, region=AWS_REGION)

    return pd.read_csv(io.BytesIO(obs.get(maap_ops_store, inventory_key).bytes()))


def generate_key_mappings(
    inventory: pd.DataFrame,
    variable: str,
) -> Tuple[List[Tuple[str, str]], ...]:
    """Generates source/destination key pairs for the COG and CSV assets to be used for
    copying the files from the maap-ops-workspace bucket to the canonical bucket for MAAP
    products
    """
    cog_key_mappings = [
        (
            row["s3_path"].replace(f"s3://{MAAP_OPS_BUCKET}/", ""),
            DEST_KEY_FMT.format(
                variable=variable, tile=row["tile_num"], name=Path(row["s3_path"]).name
            ),
        )
        for _, row in inventory.iterrows()
    ]

    csv_key_mappings = [
        (
            row["s3_path"]
            .replace(f"s3://{MAAP_OPS_BUCKET}/", "")
            .replace(".tif", "_train_data.csv"),
            DEST_KEY_FMT.format(
                variable=variable,
                tile=row["tile_num"],
                name=Path(row["s3_path"]).name.replace(".tif", "_train_data.csv"),
            ),
        )
        for _, row in inventory.iterrows()
    ]

    return cog_key_mappings, csv_key_mappings


def generate_asset_keys(
    cog_key_mappings: List[Tuple[str, str]],
    csv_key_mappings: List[Tuple[str, str]],
    bucket: str,
) -> List[Tuple[str, str]]:
    """Generates tuples of (cog, csv) S3 keys to be used as asset hrefs in the STAC items"""
    return [
        (f"s3://{bucket}/{cog_key}", f"s3://{bucket}/{csv_key}")
        for (_, cog_key), (_, csv_key) in zip(cog_key_mappings, csv_key_mappings)
    ]


def generate_stactools_messages(
    asset_keys: List[Tuple[str, str]],
    collection_id: str,
) -> List[Dict[str, Any]]:
    """Generates the StactoolsItemGenerator messages

    Includes the stactools package source (icesat2-boreal-stac repo in github), the
    CLI group name (icesat2boreal), and the positional arguments required for the
    create-item command (cog_source, csv_source)
    """
    return [
        {
            "package_name": "git+https://github.com/MAAP-Project/icesat2-boreal-stac@feat/v3.0",
            "group_name": "icesat2boreal",
            "create_item_args": [
                cog_key,
                csv_key,
            ],
            "collection_id": collection_id,
        }
        for cog_key, csv_key in asset_keys
    ]


def publish_stactools_messages(messages: List[Dict[str, Any]]) -> None:
    """Loop to publish StactoolsItemGenerator messages in batches of 10"""
    batch_size = 10

    for i in range(0, len(messages), batch_size):
        batch = messages[i : i + batch_size]

        batch_entries = []
        for j, message in enumerate(batch):
            unique_batch_id = f"msg-{i + j:04d}"

            json_message_string = json.dumps(message)

            entry = {
                "Id": unique_batch_id,
                "Message": json_message_string,
            }
            batch_entries.append(entry)

        print(
            f"\n--- Processing batch {int(i / batch_size) + 1} (messages {i} to {i + batch_size - 1}) ---"
        )

        try:
            # Publish the batch to SNS
            response = sns_client.publish_batch(
                TopicArn=STACTOOLS_ITEM_GENERATOR_SNS_TOPIC_ARN,
                PublishBatchRequestEntries=batch_entries,
            )

            # Check the response for successful and failed messages
            if "Successful" in response and response["Successful"]:
                print(
                    f"  Successfully published {len(response['Successful'])} messages in this batch."
                )
            if "Failed" in response and response["Failed"]:
                print(
                    f"  Failed to publish {len(response['Failed'])} messages in this batch:"
                )
                for failure in response["Failed"]:
                    print(
                        f"    - ID: {failure['Id']}, Code: {failure.get('Code', 'N/A')}, Message: {failure.get('Message', 'N/A')}"
                    )

        except Exception as e:
            print(f"  An error occurred while publishing this batch: {e}")

        # Small delay to avoid hitting API rate limits
        time.sleep(0.1)

    print("\n--- All batches processed ---")

## Tile geopackage

We need to upload a copy of the tile geometry geopackage with the exact tiles that are included in this collection.

In [5]:
agb_inventory = read_inventory(agb_inventory_key)
ht_inventory = read_inventory(ht_inventory_key)

tile_nums = set(agb_inventory["tile_num"].tolist() + ht_inventory["tile_num"].tolist())

public_tile_gdf = full_tile_gdf.loc[
    full_tile_gdf["tile_num"].isin(tile_nums), ["tile_num", "geometry"]
]

gpkg = "/tmp/boreal_tiles_v004.gpkg"
public_tile_gdf.to_file(gpkg)

s3_client.upload_file(
    gpkg,
    TILE_GPKG_BUCKET,
    TILE_GPKG_KEY,
    ExtraArgs={"ACL": "bucket-owner-full-control"},
)

## AGB

### Collection
Start by creating the collection and validating it using `pystac` and `stac-pydantic`

In [6]:
agb_collection = create_collection(Variable.AGB)

# validate
_ = agb_collection.validate()  # pystac
_ = Collection(**agb_collection.to_dict())  # stac-pydantic

print(json.dumps(agb_collection.to_dict(), indent=2))

{
  "type": "Collection",
  "id": "icesat2-boreal-v3.0-agb",
  "stac_version": "1.1.0",
  "description": "This dataset provides predictions of woody aboveground biomass density (AGBD) and vegetation height for high northern latitude forests at 30 m spatial resolution for the year 2020, accounting for >30% of global forest area.\n\nMaps of woody AGBD and height are essential for understanding patterns of forest structure, including the mass of forest vegetation, its carbon content, and its vertical and horizontal arrangement across managed and unmanaged landscapes. These maps are optimized to visualize these patterns, monitor forest conditions, and manage forest carbon stocks and their changes. The information contained in these maps provides insights into the current conditions and shifts in a global biome that is shaped by natural processes that play out across decades to millennia, as well as human decisions, and whose status and functioning affects wildlife, the climate, economies, 

Post the valid collection JSON to the `StacLoader` SNS topic

In [7]:
# post collection to StacLoader
response = sns_client.publish(
    TopicArn=STAC_LOADER_SNS_TOPIC_ARN, Message=json.dumps(agb_collection.to_dict())
)
print(response)

{'MessageId': 'b962be3d-e0d5-57a4-bd3d-ded692ba6976', 'ResponseMetadata': {'RequestId': 'ed81d63f-559a-53bc-8259-b6d427f45d6e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ed81d63f-559a-53bc-8259-b6d427f45d6e', 'date': 'Fri, 11 Jul 2025 20:12:31 GMT', 'content-type': 'text/xml', 'content-length': '294', 'connection': 'keep-alive'}, 'RetryAttempts': 0}}


### Items

Read the inventory csv that Paul produced for the AGB predictions

In [12]:
agb_inventory = read_inventory(agb_inventory_key)
# agb_inventory = agb_inventory.iloc[:100]

print(agb_inventory.shape)
agb_inventory.head()

(4950, 5)


Unnamed: 0,index,s3_path,local_path,file,tile_num
0,0,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_agb_2020_202506061749236725_0003543.tif,3543
1,1,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_agb_2020_202506061749236739_0003637.tif,3637
2,2,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_agb_2020_202506061749236743_0003732.tif,3732
3,3,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_agb_2020_202506061749236786_0003731.tif,3731
4,4,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_agb_2020_202506061749236758_0003638.tif,3638


Prepare a list of source/destination S3 key paths so we can copy the data out of `maap-ops-workspace` into the 'public' bucket (`nasa-maap-data-store`).

In [6]:
agb_cog_key_mappings, agb_csv_key_mappings = generate_key_mappings(agb_inventory, "agb")
print(agb_cog_key_mappings[0])
print(agb_csv_key_mappings[0])

('aliz237/dps_output/run_boreal_biomass_map/v3.0.0/AGB_H30_2020/full_run/2025/06/06/12/29/57/749959/boreal_agb_2020_202506061749236725_0003543.tif', 'file-staging/nasa-map/icesat2-boreal-v3.0/agb/0003543/boreal_agb_2020_202506061749236725_0003543.tif')
('aliz237/dps_output/run_boreal_biomass_map/v3.0.0/AGB_H30_2020/full_run/2025/06/06/12/29/57/749959/boreal_agb_2020_202506061749236725_0003543_train_data.csv', 'file-staging/nasa-map/icesat2-boreal-v3.0/agb/0003543/boreal_agb_2020_202506061749236725_0003543_train_data.csv')


Copy the files over to the destination bucket

In [14]:
await copy_s3_keys(
    source_bucket_name=MAAP_OPS_BUCKET,
    destination_bucket_name=ASSET_DEST_BUCKET,
    key_mapping_tuples=agb_cog_key_mappings + agb_csv_key_mappings,
    aws_region=AWS_REGION,
)

Copying S3 Objects: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:32<00:00,  6.23file/s]


Completed S3 copy operation. All 200 files copied successfully.





Now that the assets have been copied to the public bucket, generate the hrefs for the assets with the full `s3://{bucket}` prefix.

In [7]:
agb_asset_keys = generate_asset_keys(
    agb_cog_key_mappings, agb_csv_key_mappings, bucket=ASSET_DEST_BUCKET
)
agb_asset_keys[0]

('s3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/agb/0003543/boreal_agb_2020_202506061749236725_0003543.tif',
 's3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/agb/0003543/boreal_agb_2020_202506061749236725_0003543_train_data.csv')

Try creating a STAC item with those asset hrefs:

In [8]:
test_item = create_item(*agb_asset_keys[0])
print(json.dumps(test_item.to_dict(), indent=2))

{
  "type": "Feature",
  "stac_version": "1.1.0",
  "stac_extensions": [
    "https://stac-extensions.github.io/projection/v1.1.0/schema.json"
  ],
  "id": "boreal_agb_2020_202506061749236725_0003543",
  "geometry": {
    "type": "Polygon",
    "coordinates": [
      [
        [
          -167.7268555007834,
          65.958744412074
        ],
        [
          -165.7687890462486,
          65.80241344492612
        ],
        [
          -165.33767128760388,
          66.58197078937214
        ],
        [
          -167.35203068347897,
          66.74321074546818
        ],
        [
          -167.7268555007834,
          65.958744412074
        ]
      ]
    ]
  },
  "bbox": [
    -167.7268555007834,
    65.80241344492612,
    -165.33767128760388,
    66.74321074546818
  ],
  "properties": {
    "start_datetime": "2020-01-01T00:00:00+00:00",
    "end_datetime": "2020-12-31T23:59:59+00:00",
    "created_datetime": "2025-06-06T00:00:00+00:00",
    "proj:epsg": null,
    "proj:geom

If that looks good, generate the messages to dispatch to the `StactoolsItemGenerator`

In [9]:
agb_stactools_messages = generate_stactools_messages(agb_asset_keys, agb_collection.id)

print(json.dumps(agb_stactools_messages[0], indent=2))

{
  "package_name": "git+https://github.com/MAAP-Project/icesat2-boreal-stac@feat/v3.0",
  "group_name": "icesat2boreal",
  "create_item_args": [
    "s3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/agb/0003543/boreal_agb_2020_202506061749236725_0003543.tif",
    "s3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/agb/0003543/boreal_agb_2020_202506061749236725_0003543_train_data.csv"
  ],
  "collection_id": "icesat2-boreal-v3.0-agb"
}


Publish the messages to the `StactoolsItemGenerator` SNS topic!

In [10]:
publish_stactools_messages(agb_stactools_messages)


--- Processing batch 1 (messages 0 to 9) ---
  Successfully published 10 messages in this batch.

--- Processing batch 2 (messages 10 to 19) ---
  Successfully published 10 messages in this batch.

--- Processing batch 3 (messages 20 to 29) ---
  Successfully published 10 messages in this batch.

--- Processing batch 4 (messages 30 to 39) ---
  Successfully published 10 messages in this batch.

--- Processing batch 5 (messages 40 to 49) ---
  Successfully published 10 messages in this batch.

--- Processing batch 6 (messages 50 to 59) ---
  Successfully published 10 messages in this batch.

--- Processing batch 7 (messages 60 to 69) ---
  Successfully published 10 messages in this batch.

--- Processing batch 8 (messages 70 to 79) ---
  Successfully published 10 messages in this batch.

--- Processing batch 9 (messages 80 to 89) ---
  Successfully published 10 messages in this batch.

--- Processing batch 10 (messages 90 to 99) ---
  Successfully published 10 messages in this batch.



## Height

Do the same operations but for the `height` collection.

In [8]:
ht_collection = create_collection(Variable.HT)

# validate
_ = ht_collection.validate()  # pystac
_ = Collection(**ht_collection.to_dict())  # stac-pydantic

print(json.dumps(ht_collection.to_dict(), indent=2))

{
  "type": "Collection",
  "id": "icesat2-boreal-v3.0-ht",
  "stac_version": "1.1.0",
  "description": "This dataset provides predictions of woody aboveground biomass density (AGBD) and vegetation height for high northern latitude forests at 30 m spatial resolution for the year 2020, accounting for >30% of global forest area.\n\nMaps of woody AGBD and height are essential for understanding patterns of forest structure, including the mass of forest vegetation, its carbon content, and its vertical and horizontal arrangement across managed and unmanaged landscapes. These maps are optimized to visualize these patterns, monitor forest conditions, and manage forest carbon stocks and their changes. The information contained in these maps provides insights into the current conditions and shifts in a global biome that is shaped by natural processes that play out across decades to millennia, as well as human decisions, and whose status and functioning affects wildlife, the climate, economies, a

In [9]:
# post collection to StacLoader
response = sns_client.publish(
    TopicArn=STAC_LOADER_SNS_TOPIC_ARN, Message=json.dumps(ht_collection.to_dict())
)
print(response)

{'MessageId': '09c7d4d6-c380-5b19-8a25-b43db1f77fa8', 'ResponseMetadata': {'RequestId': 'd6c4d7d4-806e-571a-913c-b73ff21923ab', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd6c4d7d4-806e-571a-913c-b73ff21923ab', 'date': 'Fri, 11 Jul 2025 20:12:39 GMT', 'content-type': 'text/xml', 'content-length': '294', 'connection': 'keep-alive'}, 'RetryAttempts': 0}}


In [10]:
ht_inventory = read_inventory(ht_inventory_key)
# ht_inventory = ht_inventory.iloc[:100]

print(ht_inventory.shape)
ht_inventory.head()

(4950, 5)


Unnamed: 0,index,s3_path,local_path,file,tile_num
0,0,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202506061749238093_0003543.tif,3543
1,1,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202506061749238118_0035805.tif,35805
2,2,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202506061749238121_0035804.tif,35804
3,3,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202506061749238167_0001575.tif,1575
4,4,s3://maap-ops-workspace/aliz237/dps_output/run...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202506061749238134_0035803.tif,35803


In [14]:
ht_cog_key_mappings, ht_csv_key_mappings = generate_key_mappings(ht_inventory, "ht")
print(ht_cog_key_mappings[0])
print(ht_csv_key_mappings[0])

('aliz237/dps_output/run_boreal_biomass_map/v3.0.0/Ht_H30_2020/full_run/2025/06/06/12/58/00/580429/boreal_ht_2020_202506061749238093_0003543.tif', 'file-staging/nasa-map/icesat2-boreal-v3.0/ht/0003543/boreal_ht_2020_202506061749238093_0003543.tif')
('aliz237/dps_output/run_boreal_biomass_map/v3.0.0/Ht_H30_2020/full_run/2025/06/06/12/58/00/580429/boreal_ht_2020_202506061749238093_0003543_train_data.csv', 'file-staging/nasa-map/icesat2-boreal-v3.0/ht/0003543/boreal_ht_2020_202506061749238093_0003543_train_data.csv')


In [31]:
await copy_s3_keys(
    source_bucket_name=MAAP_OPS_BUCKET,
    destination_bucket_name=ASSET_DEST_BUCKET,
    key_mapping_tuples=ht_cog_key_mappings + ht_csv_key_mappings,
    aws_region=AWS_REGION,
)

Copying S3 Objects: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:26<00:00,  7.53file/s]


Completed S3 copy operation. All 200 files copied successfully.





In [15]:
ht_asset_keys = generate_asset_keys(
    ht_cog_key_mappings, ht_csv_key_mappings, bucket=ASSET_DEST_BUCKET
)
ht_asset_keys[0]

('s3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/ht/0003543/boreal_ht_2020_202506061749238093_0003543.tif',
 's3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/ht/0003543/boreal_ht_2020_202506061749238093_0003543_train_data.csv')

In [16]:
test_item = create_item(*ht_asset_keys[0])
print(json.dumps(test_item.to_dict(), indent=2))

{
  "type": "Feature",
  "stac_version": "1.1.0",
  "stac_extensions": [
    "https://stac-extensions.github.io/projection/v1.1.0/schema.json"
  ],
  "id": "boreal_ht_2020_202506061749238093_0003543",
  "geometry": {
    "type": "Polygon",
    "coordinates": [
      [
        [
          -167.7268555007834,
          65.958744412074
        ],
        [
          -165.7687890462486,
          65.80241344492612
        ],
        [
          -165.33767128760388,
          66.58197078937214
        ],
        [
          -167.35203068347897,
          66.74321074546818
        ],
        [
          -167.7268555007834,
          65.958744412074
        ]
      ]
    ]
  },
  "bbox": [
    -167.7268555007834,
    65.80241344492612,
    -165.33767128760388,
    66.74321074546818
  ],
  "properties": {
    "start_datetime": "2020-01-01T00:00:00+00:00",
    "end_datetime": "2020-12-31T23:59:59+00:00",
    "created_datetime": "2025-06-06T00:00:00+00:00",
    "proj:epsg": null,
    "proj:geome

In [17]:
ht_stactools_messages = generate_stactools_messages(ht_asset_keys, ht_collection.id)

print(json.dumps(ht_stactools_messages[0], indent=2))

{
  "package_name": "git+https://github.com/MAAP-Project/icesat2-boreal-stac@feat/v3.0",
  "group_name": "icesat2boreal",
  "create_item_args": [
    "s3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/ht/0003543/boreal_ht_2020_202506061749238093_0003543.tif",
    "s3://hrodmn-scratch/file-staging/nasa-map/icesat2-boreal-v3.0/ht/0003543/boreal_ht_2020_202506061749238093_0003543_train_data.csv"
  ],
  "collection_id": "icesat2-boreal-v3.0-ht"
}


In [18]:
publish_stactools_messages(ht_stactools_messages)


--- Processing batch 1 (messages 0 to 9) ---
  Successfully published 10 messages in this batch.

--- Processing batch 2 (messages 10 to 19) ---
  Successfully published 10 messages in this batch.

--- Processing batch 3 (messages 20 to 29) ---
  Successfully published 10 messages in this batch.

--- Processing batch 4 (messages 30 to 39) ---
  Successfully published 10 messages in this batch.

--- Processing batch 5 (messages 40 to 49) ---
  Successfully published 10 messages in this batch.

--- Processing batch 6 (messages 50 to 59) ---
  Successfully published 10 messages in this batch.

--- Processing batch 7 (messages 60 to 69) ---
  Successfully published 10 messages in this batch.

--- Processing batch 8 (messages 70 to 79) ---
  Successfully published 10 messages in this batch.

--- Processing batch 9 (messages 80 to 89) ---
  Successfully published 10 messages in this batch.

--- Processing batch 10 (messages 90 to 99) ---
  Successfully published 10 messages in this batch.

