In [None]:
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "boto3",
#     "httpx",
#     "obstore",
#     "pystac",
#     "pypgstac[psycopg]==0.8.5",
#     "stactools-glad-global-forest-change==0.1.1",
#     "tqdm",
# ]
#
# ///

In [None]:
import asyncio
import json
import os
from collections import defaultdict
from typing import List

import boto3
import httpx
import obstore
import obstore.store
import pystac
import tqdm
from pypgstac.db import PgstacDB
from pypgstac.load import Loader, Methods
from pystac.extensions.version import VersionRelType

from stactools.glad_global_forest_change.stac import (
    create_item,
    create_collection,
    parse_filename, 
)


pystac.set_stac_version("1.0.0")


INGESTOR_URL = "https://stac-ingestor.maap-project.org"

BUCKET = "nasa-maap-data-store"
PREFIX = "file-staging/nasa-map/glad-global-forest-change-v1.11"


def create_inventory() -> List[List[str]]:
    """List files in S3 and create asset lists by item ID."""
    inventory = defaultdict(list)
    store = obstore.store.S3Store(
        bucket=BUCKET,
        prefix=PREFIX,
        region="us-west-2",
    )

    for batch in obstore.list(store):
        for meta in batch:
            key = meta["path"]
            
            if not key.endswith("tif"):
                continue
                
            basename = os.path.basename(key)
            
            parsed = parse_filename(basename)
            assert parsed
            href = f"s3://{BUCKET}/{store.prefix}/{key}"
            inventory[parsed["id"]].append(href)

    return list(inventory.values())

async def post_item(client: httpx.AsyncClient, item, token: str) -> None:
    """Post a single item to the STAC ingestor API"""
    try:
        response = await client.post(
            f"{INGESTOR_URL}/ingestions",
            json=item.to_dict(),
            headers={
                'Authorization': f'Bearer {token}',
                'Content-Type': 'application/json',
            },
        )
        response.raise_for_status()
    except Exception as e:
        print(f"Error posting item: {e}")
        raise

async def post_all_items(items: List, token: str, max_concurrent: int = 20) -> None:
    """Post all items concurrently with a limit on concurrent requests"""
    async with httpx.AsyncClient(timeout=60) as client:
        semaphore = asyncio.Semaphore(max_concurrent)
        
        async def bounded_post(item):
            async with semaphore:
                return await post_item(client, item, token)
        
        tasks = [bounded_post(item) for item in items]
        
        for task in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks)):
            await task

In [None]:
# paste MAAP SMCE AWS credentials here:
session = boto3.Session(
    region_name="us-west-2",
)
client = session.client("secretsmanager", region_name="us-west-2")

# MAAP STAC secret
response = client.get_secret_value(
    SecretId="arn:aws:secretsmanager:us-west-2:916098889494:secret:MAAP-STAC-auth-dev/MAAP-workflows-EsykqB"
)

settings = json.loads(response["SecretString"])

# function to get token for STAC ingestor
def get_token(
    client_id: str, 
    client_secret: str, 
    domain: str,
    scope: str
) -> str:
    response = httpx.post(
        f"{domain}/oauth2/token",
        headers={
            "Content-Type": "application/x-www-form-urlencoded",
        },
        auth=(client_id, client_secret),
        data={
            "grant_type": "client_credentials",
            "scope": scope,
        },
    )
    try:
        response.raise_for_status()
    except Exception:
        raise

    return response.json()["access_token"]


token = get_token(
    client_id = settings["client_id"],
    client_secret = settings["client_secret"],
    domain = settings["cognito_domain"],
    scope = settings["scope"],
)

In [None]:
inventory = create_inventory()
len(inventory)

In [None]:
collection = create_collection(cogs=True)

post_collection = httpx.post(
    f"{INGESTOR_URL}/collections",
    json=collection.to_dict(),
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json',
    }
)
print(post_collection.json())

In [None]:
items = []
for hrefs in inventory:
    item = create_item(asset_hrefs=hrefs, cogs=True)
    item.collection_id = collection.id
    items.append(item)

In [None]:
await post_all_items(items, token)

In [None]:
items[0]