In [None]:
# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "icesat2-boreal-stac",
#     "smart-open",
#     "tqdm",
# ]
#
# [tool.uv.sources]
# icesat2-boreal-stac = { git = "https://github.com/MAAP-project/icesat2-boreal-stac.git", rev = "0.2.3" }
# ///

# Update version metadata in the original icesat2-boreal collection

The goal is to add a `deprecated` tag to the collection metadata and provide a link to the latest version (icesat2-boreal-v2.1-agb).


In [None]:
import asyncio
import json
from typing import Any, Dict, List

import boto3
import httpx
import pystac
import smart_open
import tqdm

from icesat2_boreal_stac.stac import create_collection, Variable

pystac.set_stac_version("1.1.0")

# STAC ingestor URL
INGESTOR_URL = "https://stac-ingestor.maap-project.org"

## Step 1: get token for STAC ingestor API
This needs to be run with credentials for the SMCE MAAP AWS account set in the environment.


In [None]:
# paste MAAP SMCE AWS credentials here:
session = boto3.Session(
    region_name="us-west-2",
)
client = session.client("secretsmanager", region_name="us-west-2")

# MAAP STAC secret
response = client.get_secret_value(
    SecretId="arn:aws:secretsmanager:us-west-2:916098889494:secret:MAAP-STAC-auth-dev/MAAP-workflows-EsykqB"
)

settings = json.loads(response["SecretString"])

# function to get token for STAC ingestor
def get_token(
    client_id: str, 
    client_secret: str, 
    domain: str,
    scope: str
) -> str:
    response = httpx.post(
        f"{domain}/oauth2/token",
        headers={
            "Content-Type": "application/x-www-form-urlencoded",
        },
        auth=(client_id, client_secret),
        data={
            "grant_type": "client_credentials",
            "scope": scope,
        },
    )
    try:
        response.raise_for_status()
    except Exception:
        raise

    return response.json()["access_token"]


token = get_token(
    client_id = settings["client_id"],
    client_secret = settings["client_secret"],
    domain = settings["cognito_domain"],
    scope = settings["scope"],
)

## Step 2: create the collection objects


In [None]:
agb_collection = create_collection(Variable.AGB)
ht_collection = create_collection(Variable.HT)

print(
    json.dumps(agb_collection.to_dict(), indent=2)
)

## Step 3: Post collections to the STAC ingestor API

In [None]:
post_agb = httpx.post(
    f"{INGESTOR_URL}/collections",
    json=agb_collection.to_dict(),
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json',  # Assuming you are sending JSON data
    },
    timeout=None,
)
print(post_agb.json())

In [None]:
post_ht = httpx.post(
    f"{INGESTOR_URL}/collections",
    json=ht_collection.to_dict(),
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json',  # Assuming you are sending JSON data
    },
    timeout=None,
)
print(post_ht.json())

## Step 4: Load items to the ingestor API

The items were generated in a [separate process](./item-gen.py) and added to a ndjson file.

In [None]:
ndjson_key = "s3://maap-ops-workspace/henrydevseed/icesat2-boreal-v2.1/items.ndjson"

with smart_open.open(ndjson_key) as src:
    items = [json.loads(line) for line in src]

In [None]:
async def post_item(client: httpx.AsyncClient, item: Dict[str, Any], token: str) -> None:
    """Post a single item to the STAC ingestor API"""
    try:
        response = await client.post(
            f"{INGESTOR_URL}/ingestions",
            json=item,
            headers={
                'Authorization': f'Bearer {token}',
                'Content-Type': 'application/json',
            },
        )
        response.raise_for_status()
    except Exception as e:
        print(f"Error posting item: {e}")
        raise

async def post_all_items(items: List, token: str, max_concurrent: int = 20) -> None:
    """Post all items concurrently with a limit on concurrent requests"""
    async with httpx.AsyncClient(timeout=60) as client:
        semaphore = asyncio.Semaphore(max_concurrent)
        
        async def bounded_post(item):
            async with semaphore:
                return await post_item(client, item, token)
        
        tasks = [bounded_post(item) for item in items]
        
        for task in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks)):
            await task


In [None]:
await post_all_items(items, token)