# improved catalog check for Sentinel-1 and Sentinel-2

#### old way

In [1]:
from __future__ import annotations
import requests
import json
from eo_processing.utils.geoprocessing import reproj_bbox_to_ll

# determine S1 or S2
sentinel = 'S2'
start = '2020-01-01T00:00:00.00Z'
end = '2024-12-31T23:59:59.00Z'
aoi = {'east': 4840000, 'south': 2800000, 'west': 4820000, 'north': 2820000, 'crs': 3035}
latlon_box = reproj_bbox_to_ll(aoi)


#-------
if sentinel == 'S1': satelite = "SENTINEL-1"
elif sentinel == 'S2': satelite = "SENTINEL-2"
else: raise ValueError(f"{sentinel} is not satellite for which this has been implemented")

url=  (f"https://datahub.creodias.eu/odata/v1/Products?$filter=Collection/Name eq '{satelite}' and "
       f"OData.CSC.Intersects(area=geography'SRID=4326;{latlon_box}') and ContentDate/Start "
       f"gt {start} and ContentDate/Start lt {end}&$top={100}")
results = requests.get(url)
json_data = json.loads(results.text)
print(len(json_data["value"]))

100


#### improved version based on pySTAC

In [6]:
from typing import TYPE_CHECKING
from eo_processing.utils.geoprocessing import reproj_bbox_to_ll
import pystac_client
import pandas as pd
from eo_processing.config.data_formats import openEO_bbox_format


def catalog_check_CDSE_S2(start: str, end: str, bbox: openEO_bbox_format) -> None:
    #quickfix on dates that are in date format
    if not 'Z' in start:
        start = start + "T00:00:00.00Z"
    if not 'Z' in end:
        end = end + "T00:00:00.00Z"

    # set the minimum number of S2 images with two satellites (5 daily observation)
    MIN_VALUE_S2 = 1./5.
    #in 2017 S2B started in june/july so than only S2A sattelite
    if pd.to_datetime(start).year == '2017':
        MIN_VALUE_S2 = 1./10.

    # the percentage of observations we want to have at least
    percentage = 0.8
    # convert the openEO bbox format to a shapely Polygon
    latlon_box = reproj_bbox_to_ll(bbox)
    # number of days in the temporal extent
    temp_extent_days = (pd.to_datetime(end)-pd.to_datetime(start)).days

    # run the PySTAC-client search
    # Connect to the Copernicus Data Space Ecosystem STAC API
    #catalog_url = "https://catalogue.dataspace.copernicus.eu/stac"
    catalog_url = "https://stac.dataspace.copernicus.eu/v1/"
    client = pystac_client.Client.open(catalog_url)

    search = client.search(
        collections=['sentinel-2-l2a'],
        bbox=list(latlon_box.bounds),
        datetime=f"{start}/{end}",
        fields=["id", "properties.datetime"],
        #query={"eo:cloud_cover": {"lt": 95}},
    )

    # get the dates of all found matches
    results = []
    for item in search.items_as_dicts():
        results.append(item['properties']['datetime'])

    # count the number of unique dates on which we have observations (resolved tile overlap)
    df = pd.DataFrame(results, columns=['date'])
    df['date'] = pd.to_datetime(df['date'])
    df['date'] = df['date'].apply(lambda x: x.date())
    nbr_files = df['date'].nunique()

    print(f'Found {nbr_files} images.')

    # run the test
    if nbr_files < MIN_VALUE_S2*percentage*temp_extent_days:
        raise ValueError(f'not enough S2 images. Found {nbr_files} images.')

In [8]:
catalog_check_CDSE_S2(start, end, aoi)

Found 724 images.


#### now Sentinel-1

In [None]:
def catalogue_check_CDSE_S1(orbit_direction: str, start: str, end: str, bbox: openEO_bbox_format) -> str | None:
    #quickfix on dates that are in date format
    if not 'Z' in start:
        start = start + "T00:00:00.00Z"
    if not 'Z' in end:
        end = end + "T00:00:00.00Z"

    # set the minimum number of S1 images with two satellites
    MIN_VALUE_S1 = 1./12.
    # the percentage of observations we want to have at least
    percentage = 0.8
    # convert the openEO bbox format to a shapely Polygon
    latlon_box = reproj_bbox_to_ll(bbox)
    # number of days in the temporal extent
    temp_extent_days = (pd.to_datetime(end)-pd.to_datetime(start)).days

    # run the PySTAC-client search
    # Connect to the Copernicus Data Space Ecosystem STAC API
    #catalog_url = "https://catalogue.dataspace.copernicus.eu/stac"
    catalog_url = "https://stac.dataspace.copernicus.eu/v1/"
    client = pystac_client.Client.open(catalog_url)

    # if we have an orbit_direction given we have to test that first
    if orbit_direction is not None:
        if orbit_direction not in ['ASCENDING', 'DESCENDING']:
            raise ValueError(
                f'`orbit_direction` value `{orbit_direction}` not recognized.')

        search = client.search(
            collections=['sentinel-1-grd'],
            bbox=list(latlon_box.bounds),
            datetime=f"{start}/{end}",
            query={"sat:orbit_state": {"eq": f"{orbit_direction.lower()}"},
                   #"sar:polarizations": {"eq": "VV&VH"},
                   },
        )

        # get the dates of all found matches
        results = []
        for item in search.items():
            results.append(item.datetime.date())

        # count the number of unique dates on which we have observations (resolved tile overlap)
        df = pd.DataFrame(results, columns=['date'])
        nbr_files = df['date'].nunique()

        if nbr_files < MIN_VALUE_S1*percentage*temp_extent_days:
            print(f'Not enough S1 images with orbit {orbit_direction}. \n' + \
                  f'Found {nbr_files} images.')
        else: return orbit_direction
    #use both orbits -> check with both directions.

    search = client.search(
        collections=['sentinel-1-grd'],
        bbox=list(latlon_box.bounds),
        datetime=f"{start}/{end}",
        #query={"sar:polarizations": {"eq": "VV&VH"} },
    )

    # get the dates of all found matches
    results = []
    for item in search.items():
        results.append(item.datetime.date())

    # count the number of unique dates on which we have observations (resolved tile overlap)
    df = pd.DataFrame(results, columns=['date'])
    nbr_files = df['date'].nunique()

    if nbr_files < MIN_VALUE_S1*percentage*temp_extent_days:
        raise ValueError(f'not enough S1 without orbit direction selection. \n'+ \
                         f'Found {nbr_files} images.')

    return None

In [None]:
orbit = catalogue_check_CDSE_S1('DESCENDING', start, '2020-12-31T23:59:59.00Z', aoi)