In [1]:
# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "httpx",
#     "pandas",
#     "pystac",
#     "pystac-client",
#     "tabulate",
# ]
# ///

# Set MAAP as the 'host' Provider for STAC collections

Many of the STAC collections do not have MAAP set as the 'host' in the list of Providers. This notebook assigns MAAP as the host for collections where the assets are stored in a MAAP bucket.

In [1]:
import json

import pandas as pd
import httpx
from pystac import Collection, Provider, ProviderRole
from pystac_client import Client

client = Client.open("https://stac.maap-project.org")

# fetch all collections
collections = list(client.get_all_collections())

Create a table that shows the existing providers for each collection

In [41]:
data_for_df = []

for collection in collections:
    providers = collection.providers or []
    if not providers:
        data_for_df.append({
            "Collection ID": collection.id,
            "Provider Name": "N/A",
            "Roles": "N/A",
        })
    else:
        for provider in providers:
            roles_str = ", ".join(role for role in provider.roles)
            data_for_df.append(
                {
                    "Collection ID": collection.id,
                    "Provider Name": provider.name,
                    "Roles": roles_str
                }
            )


df = (
    pd.DataFrame(data_for_df)
    .sort_values(by=["Collection ID", "Provider Name"])
    .set_index("Collection ID")
)
print(df.to_markdown())


| Collection ID                        | Provider Name                                                             | Roles                         |
|:-------------------------------------|:--------------------------------------------------------------------------|:------------------------------|
| ABoVE_UAVSAR_PALSAR                  | N/A                                                                       | N/A                           |
| AFRISAR_DLR                          | N/A                                                                       | N/A                           |
| AFRISAR_DLR2                         | N/A                                                                       | N/A                           |
| AfriSAR_UAVSAR_Coreg_SLC             | N/A                                                                       | N/A                           |
| AfriSAR_UAVSAR_Geocoded_Covariance   | N/A                                                              

Identify collections where the assets do not appear to be hosted by MAAP based on the asset hrefs.

In [42]:
non_maap_assets = {}
for collection in collections:
    try:
        item = next(collection.get_all_items())
        assets = {key: asset.href for key, asset in item.assets.items() if not asset.href.startswith("s3://nasa-maap-data-store")}
        if assets:
            non_maap_assets[collection.id] = assets
    except StopIteration:
        continue

print(json.dumps(non_maap_assets, indent=2))

{
  "ABoVE_UAVSAR_PALSAR": {
    "https://datapool.asf.alaska.edu/BROWSE/UA/AKLAVI_25702_17070_003_170622_L090_CX_01.gif": "https://datapool.asf.alaska.edu/BROWSE/UA/AKLAVI_25702_17070_003_170622_L090_CX_01.gif"
  },
  "AFRISAR_DLR": {
    "dbf": "https://bmap-catalogue-data.oss.eu-west-0.prod-cloud-ocb.orange-business.com/Campaign_data/afrisar_dlr/afrisar_dlr_roi_RAB100q.dbf",
    "prj": "https://bmap-catalogue-data.oss.eu-west-0.prod-cloud-ocb.orange-business.com/Campaign_data/afrisar_dlr/afrisar_dlr_roi_RAB100q.prj",
    "shp": "https://bmap-catalogue-data.oss.eu-west-0.prod-cloud-ocb.orange-business.com/Campaign_data/afrisar_dlr/afrisar_dlr_roi_RAB100q.shp",
    "shx": "https://bmap-catalogue-data.oss.eu-west-0.prod-cloud-ocb.orange-business.com/Campaign_data/afrisar_dlr/afrisar_dlr_roi_RAB100q.shx"
  },
  "BIOSAR1": {
    "dbf": "https://bmap-catalogue-data.oss.eu-west-0.prod-cloud-ocb.orange-business.com/Campaign_data/biosar1/biosar1_roi_lidar58.dbf",
    "prj": "https://bmap-cat

Steps:
1. Remove 'host' role from any existing providers
2. Make sure there is a MAAP provider and assign the role 'host' to it (unless it is in the list of collections with an alternate host)

In [43]:
updated_collections = []

alternate_host_providers = {
    "ABoVE_UAVSAR_PALSAR": Provider(
        name="ASF",
        description="The Alaska Satellite Facility is part of the Geophysical Institute, located on the University of Alaska Fairbanks campus.",
        url="https://asf.alaska.edu/",
        roles=[ProviderRole.HOST],
    ),
    "AFRISAR_DLR": None,
    "BIOSAR1": None,
    "nisar-sim": Provider(
        name="ASF",
        description="The Alaska Satellite Facility is part of the Geophysical Institute, located on the University of Alaska Fairbanks campus.",
        url="https://asf.alaska.edu/",
        roles=[ProviderRole.HOST],
    )
}

for collection in collections:
    maap_provider = Provider(
        name="MAAP",
        description="The MAAP platform is designed to combine data, algorithms, and "
        "computational abilities for the processing and sharing of data related to "
        "NASA’s GEDI, ESA’s BIOMASS, and NASA/ISRO’s NISAR missions",
        url="https://maap-project.org",
        roles=[ProviderRole.HOST],
    )
    
    has_maap_provider = False
    
    new_collection = Collection.from_dict(collection.to_dict())
    for provider in new_collection.providers or []:
        if provider.name != "MAAP":
            # remove host designation if provider is not MAAP
            provider.roles = [role for role in provider.roles if role != ProviderRole.HOST]
        else:
            has_maap_provider = True
            roles = provider.roles
            provider = maap_provider
            provider.roles = list(set(provider.roles + roles))
            
    new_collection.providers = [provider for provider in new_collection.providers or [] if provider.roles]

    if alternate_host_provider := alternate_host_providers.get(collection.id):
        new_collection.providers.append(alternate_host_provider)
    elif not has_maap_provider:
        new_collection.providers.append(maap_provider)

    updated_collections.append(new_collection)



In [46]:
for collection in updated_collections:
    if collection.providers:
        for provider in collection.providers or []:
            print("collection:", collection.id, "provider:", provider.name, "roles:", provider.roles)

collection: ABoVE_UAVSAR_PALSAR provider: ASF roles: ['host']
collection: AFRISAR_DLR provider: MAAP roles: ['host']
collection: AFRISAR_DLR2 provider: MAAP roles: ['host']
collection: AfriSAR_UAVSAR_Coreg_SLC provider: MAAP roles: ['host']
collection: AfriSAR_UAVSAR_Geocoded_Covariance provider: MAAP roles: ['host']
collection: AfriSAR_UAVSAR_Geocoded_SLC provider: MAAP roles: ['host']
collection: AfriSAR_UAVSAR_KZ provider: MAAP roles: ['host']
collection: AfriSAR_UAVSAR_Normalization_Area provider: MAAP roles: ['host']
collection: AfriSAR_UAVSAR_Ungeocoded_Covariance provider: MAAP roles: ['host']
collection: BIOSAR1 provider: MAAP roles: ['host']
collection: ESACCI_Biomass_L4_AGB_V4_100m provider: MAAP roles: ['host']
collection: GEDI_CalVal_Field_Data provider: MAAP roles: ['host']
collection: GEDI_CalVal_Lidar_COPC provider: MAAP roles: ['host']
collection: GEDI_CalVal_Lidar_Data provider: MAAP roles: ['host']
collection: GEDI_CalVal_Lidar_Data_Compressed provider: MAAP roles: ['