# STAC Item Search and Submission to Data Pipeline

This notebook allows operators to:

1. Define an area of interest (AOI) and time range
2. Search for STAC items from the EOPF STAC catalog
3. Submit selected items to the data pipeline for processing via HTTP webhook

## Setup and Imports


In [7]:
import json

import pandas as pd
import requests
from pystac_client import Client

# Try to load .env file if available
# try:
#     from dotenv import load_dotenv

#     dotenv_path = Path(".env")
#     if dotenv_path.exists():
#         load_dotenv(dotenv_path)
#         print("‚úÖ Loaded credentials from .env file")
#     else:
#         print("‚ÑπÔ∏è  No .env file found, will prompt for credentials")
# except ImportError:
#     print("‚ÑπÔ∏è  python-dotenv not installed, will prompt for credentials")
#     print("   Install with: pip install python-dotenv")

## Configuration


In [8]:
# STAC API Configuration
STAC_API_URL = "https://stac.core.eopf.eodc.eu/"

# Webhook Configuration
WEBHOOK_URL = "http://localhost:12000/samples"

print("‚úÖ Configuration loaded")

‚úÖ Configuration loaded


## Define Area and Time of Interest


In [9]:
# Area of Interest (AOI) - Bounding box: [min_lon, min_lat, max_lon, max_lat]
# Example: Rome area
# aoi_bbox = [12.4, 41.8, 12.6, 42.0]
# Example 2: Majorca area (2.1697998046875004%2C39.21097520599528%2C3.8177490234375004)
# aoi_bbox = [2.16, 39.21, 3.82, 39.78]
# Example 3: France Full
aoi_bbox = [-5.14, 41.33, 9.56, 51.09]

# Time range
start_date = "2025-07-01T00:00:00Z"
end_date = "2025-07-31T23:59:59Z"

print(f"Area of Interest: {aoi_bbox}")
print(f"Time Range: {start_date} to {end_date}")

Area of Interest: [-5.14, 41.33, 9.56, 51.09]
Time Range: 2025-07-01T00:00:00Z to 2025-07-31T23:59:59Z


## Browse Available Collections


In [10]:
# Connect to STAC API
catalog = Client.open(STAC_API_URL)

# List available collections
collections = list(catalog.get_collections())

print(f"\nüìö Available Collections ({len(collections)} total):\n")
for col in collections:
    print(f"  - {col.id}")
    if col.description:
        print(
            f"    {col.description[:100]}..."
            if len(col.description) > 100
            else f"    {col.description}"
        )
    print()


üìö Available Collections (12 total):

  - sentinel-2-l2a
    The Sentinel-2 Level-2A Collection 1 product provides orthorectified Surface Reflectance (Bottom-Of-...

  - sentinel-3-olci-l2-lfr
    The Sentinel-3 OLCI L2 LFR product provides land and atmospheric geophysical parameters computed for...

  - sentinel-1-l1-grd
    The Sentinel-1 Level-1 Ground Range Detected (GRD) products consist of focused SAR data that has bee...

  - sentinel-1-l2-ocn
    The Sentinel-1 Level-2 Ocean (OCN) products for wind, wave and currents applications may contain the...

  - sentinel-3-olci-l1-err
    The Sentinel-3 OLCI L1 ERR product provides TOA radiances at reduced resolution for each pixel in th...

  - sentinel-3-slstr-l2-frp
    The Sentinel-3 SLSTR Level-2 FRP product provides global (over land and water) fire radiative power.

  - sentinel-3-olci-l2-lrr
    The Sentinel-3 OLCI L2 LRR product provides land and atmospheric geophysical parameters computed for...

  - sentinel-2-l1c
    The 

## Select Collection and Search for Items


In [11]:
# Choose the source collection to search
source_collection = "sentinel-2-l2a"  # Change this to your desired collection

# Choose the target collection for processing
target_collection = "sentinel-2-l2a"  # Change this to your target collection

print(f"üîç Searching collection: {source_collection}")
print(f"üéØ Target collection for processing: {target_collection}")

üîç Searching collection: sentinel-2-l2a
üéØ Target collection for processing: sentinel-2-l2a


In [None]:
# Search for items
search = catalog.search(
    collections=[source_collection],
    bbox=aoi_bbox,
    datetime=f"{start_date}/{end_date}",  # Adjust as needed
    limit=100,  # Adjust limit as needed
)

# Collect items paginated results and clean them (workaround for issue #26)
# Use pages_as_dicts() to get raw JSON before PySTAC parsing
items = []
from pystac import Item

for page_dict in search.pages_as_dicts():
    for feature in page_dict.get("features", []):
        # Clean assets with missing href before parsing
        if "assets" in feature:
            original_count = len(feature["assets"])
            feature["assets"] = {
                key: asset for key, asset in feature["assets"].items() if "href" in asset
            }
            removed_count = original_count - len(feature["assets"])
            if removed_count > 0:
                item_id = feature.get("id", "unknown")
                # print(f"‚ö†Ô∏è  Item {item_id}: Removed {removed_count} asset(s) with missing href")

        # Now parse the cleaned item
        try:
            item = Item.from_dict(feature)
            items.append(item)
        except Exception as e:
            item_id = feature.get("id", "unknown")
            print(f"‚ö†Ô∏è  Skipping item {item_id}: {e}")
            continue

print(f"\n‚úÖ Found {len(items)} items (after filtering).\n")

‚ö†Ô∏è  Item S2B_MSIL2A_20250730T113319_N0511_R080_T30UUU_20250730T135754: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2B_MSIL2A_20250730T113319_N0511_R080_T29UQR_20250730T135754: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2B_MSIL2A_20250730T113319_N0511_R080_T29UQP_20250730T135754: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2C_MSIL2A_20250730T104041_N0511_R008_T32TLS_20250730T160714: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2C_MSIL2A_20250730T104041_N0511_R008_T31UGR_20250730T160714: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2C_MSIL2A_20250730T104041_N0511_R008_T31UGQ_20250730T160714: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2C_MSIL2A_20250730T104041_N0511_R008_T31UGP_20250730T160714: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2C_MSIL2A_20250730T104041_N0511_R008_T31UFS_20250730T160714: Removed 1 asset(s) with missing href
‚ö†Ô∏è  Item S2C_MSIL2A_20250730T104041_N0511_R008_T31UES_20250730T160714: Removed 1 asset(s) with missi

KeyboardInterrupt: 

## Submit Items to Pipeline


In [None]:
def submit_item_to_pipeline(item_url: str, target_collection: str) -> bool:
    """
    Submit a single STAC item to the data pipeline via HTTP webhook.

    Args:
        item_url: The self-link URL of the STAC item
        target_collection: The target collection for processing

    Returns:
        True if successful, False otherwise
    """
    try:
        # Create payload
        payload = {
            "source_url": item_url,
            "collection": target_collection,
            "action": "convert-v1-s2",  # specify the action to use the V1 S2 trigger
        }

        # Submit via HTTP webhook endpoint
        message = json.dumps(payload)
        response = requests.post(
            WEBHOOK_URL,
            data=message,
            headers={"Content-Type": "application/json"},
        )

        response.raise_for_status()
        return True

    except Exception as e:
        print(f"‚ùå Error submitting item: {e}")
        return False

In [None]:
# Submit all found items to the pipeline
if items:
    print(f"\nüì§ Submitting {len(items)} items to pipeline...\n")

    success_count = 0
    fail_count = 0

    for item in items:
        # Get the self link (canonical URL for the item)
        item_url = next((link.href for link in item.links if link.rel == "self"), None)

        if not item_url:
            print(f"‚ö†Ô∏è  Skipping {item.id}: No self link found")
            fail_count += 1
            continue

        # Submit to pipeline
        if submit_item_to_pipeline(item_url, target_collection):
            print(f"‚úÖ Submitted: {item.id}")
            success_count += 1
        else:
            print(f"‚ùå Failed: {item.id}")
            fail_count += 1

    print("\nüìä Summary:")
    print(f"  - Successfully submitted: {success_count}")
    print(f"  - Failed: {fail_count}")
    print(f"  - Total: {len(items)}")
else:
    print("No items to submit.")

## Submit Specific Items (Optional)

If you want to submit only specific items instead of all found items, you can manually select them:


In [None]:
# Example: Submit only specific items by index
# Uncomment and modify as needed

# selected_indices = [0, 1, 2]  # Select first 3 items
#
# for idx in selected_indices:
#     if idx < len(items):
#         item = items[idx]
#         item_url = next((link.href for link in item.links if link.rel == "self"), None)
#
#         if item_url:
#             if submit_item_to_pipeline(item_url, target_collection):
#                 print(f"‚úÖ Submitted: {item.id}")
#             else:
#                 print(f"‚ùå Failed: {item.id}")
#     else:
#         print(f"‚ö†Ô∏è  Index {idx} out of range")