### Download data from the Copernicus Sentinel-2 Long term archive (LTA) using the Sentinel Hub API

Copernicus Open Access Hub no longer stores all products for immediate retrieval. \
So we need to trigger a request from the Long Term Archive (LTA) 

Docs: https://sentinelsat.readthedocs.io/en/stable/api_overview.html#lta-products

1. Search for the product we want to download
2. Check if it is online or offline (means LTA)
3. trigger a request for the product from the LTA
4. add handling for the request status
5. ensure that only every 30 min a request is triggered

In [2]:
import numpy as np
import os
import geopandas as gpd
from shapely.geometry import Polygon
from geopandas import GeoSeries, GeoDataFrame
from pyproj import Proj, transform
from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt, make_path_filter
from datetime import date
from zipfile import ZipFile
import shutil
from dotenv import load_dotenv

1. Search for the product we want to download

In [3]:
# open file with polygons of germany
trn_polygons_germany_tiles_path = r'C:\Users\Fabian\OneDrive - Universität Würzburg\Uni Würzburg\Master\Masterthesis\Code\Einarbeitung\trn_polygons_germany_tiles.shp'
trn_polygons_germany_tiles = gpd.read_file(trn_polygons_germany_tiles_path)

In [4]:
# load credentials from .env file
load_dotenv()
api_user = os.getenv("API_USER")
api_secret = os.getenv("API_SECRET")
api_url = os.getenv("API_URL")

api = SentinelAPI(api_user, api_secret, api_url)

In [35]:
# use centroid of tile for footprint
footprint1 = list(set(trn_polygons_germany_tiles.centroid_o))[1]

In [25]:
# query for image which lies within the footprint of the selected tile
products = api.query(footprint,
                    date=('2018601', date(2018, 8, 1)),
                    platformname='Sentinel-2',
                    producttype="S2MSI2A",
                    cloudcoverpercentage=(0, 20),
                    ) #area_relation='IsWithin',#processinglevel="Level-2A"

In [12]:
products

OrderedDict([('442ffded-03e7-488c-bbd0-ccef97cea031',
              {'title': 'S2A_MSIL2A_20180731T102021_N0208_R065_T32UPA_20180731T132903',
               'link': "https://apihub.copernicus.eu/apihub/odata/v1/Products('442ffded-03e7-488c-bbd0-ccef97cea031')/$value",
               'link_alternative': "https://apihub.copernicus.eu/apihub/odata/v1/Products('442ffded-03e7-488c-bbd0-ccef97cea031')/",
               'link_icon': "https://apihub.copernicus.eu/apihub/odata/v1/Products('442ffded-03e7-488c-bbd0-ccef97cea031')/Products('Quicklook')/$value",
               'summary': 'Date: 2018-07-31T10:20:21.024Z, Instrument: MSI, Satellite: Sentinel-2, Size: 1.08 GB',
               'ondemand': 'false',
               'ingestiondate': datetime.datetime(2018, 7, 31, 16, 23, 19, 750000),
               'beginposition': datetime.datetime(2018, 7, 31, 10, 20, 21, 24000),
               'endposition': datetime.datetime(2018, 7, 31, 10, 20, 21, 24000),
               'orbitnumber': 16221,
        

In [26]:
# convert OrderedDict to GeoDataFrame
products_gdf = api.to_geodataframe(products)

In [27]:
# sort products by cloud cover (lowest first)
products_gdf_sorted = products_gdf.sort_values(['cloudcoverpercentage'], ascending=[True])

In [28]:
# get uuid from product (as geoDataframe) to download it
uuid_product = products_gdf_sorted.uuid[0]
title_product = products_gdf_sorted.title[0] 

In [29]:
# check if product is online
is_online = api.is_online(uuid_product)

In [30]:
if is_online:
    print(f'Product is online. Starting download.')
    api.download(uuid_product, directory_path=r'raw_data')
else:
    print(f'Product is not online.')
    # download from aws it this fails, trigger offline retrieval
    api.trigger_offline_retrieval(uuid_product)

Product is not online.


In [36]:
from sentinelsat.exceptions import LTATriggered
try:
    api.download(uuid_product,directory_path=r'raw_data')
except LTATriggered:
    print('LTA triggered')

LTA triggered


In [38]:
import asyncio
import time
from pathlib import Path

In [40]:
async def download_from_lta(
    api: SentinelAPI, product_uuid: str, download_root: Path, timeout_hours: int = 24
) -> bool:
    """
    Downloads a product from the Long-Term Archive (LTA).

    :param api: SentinelAPI instance
    :param product_id: ID of the product to download
    :param timeout_hours: Maximal number of hours to keep retrying
    """
    timeout = timeout_hours * 3600  # Convert hours to seconds
    start_time = time.monotonic()
    while time.monotonic() - start_time < timeout:
        try:
            api.download(product_uuid, directory_path=download_root)
            print(f"Product {product_uuid} downloaded successfully.")
            return True
        except LTATriggered:
            print(f"Waiting since {time.monotonic() - start_time} seconds.")
            await asyncio.sleep(300)  # Wait for 5 minutes before trying again
    print(f"Download timed out after {timeout_hours} hours.")
    return False

In [46]:
result = asyncio.run(download_from_lta(api, uuid_product, Path(r'raw_data')))
if result:
    print("Download successful.")
else:
    print("Download failed.")

RuntimeError: asyncio.run() cannot be called from a running event loop

In [33]:
uuid_product

'7c767d3a-18a6-4d04-9a32-7e56e62591b3'

In [31]:
api.trigger_offline_retrieval(uuid_product)

True

In [34]:
api.download(uuid_product,directory_path=r'raw_data')

LTATriggered: Product 7c767d3a-18a6-4d04-9a32-7e56e62591b3 is not online. Triggered retrieval from the Long Term Archive.

In [8]:
title = "S2B_MSIL1C_20180610T100029_N0206_R122_T33UVS_20180610T120934"

In [None]:
regex = "^(?<mission>S2[A-B])_(?<product_level>MSIL[1-2][A-C])_(?<sensing_time>\d{8}T\d{6})_(?<processing_baseline>N\d{4})_(?<relative_orbit>R\d{3})_T(?<utm_code>\d{2})(?<latitude_band>\w)(?<square>\w{2})_(?<year>\d{4})(?<month>\d{2})(?<day>\d{2})T(?<product_time>\d{6})"

In [6]:
# get uuid from product (as OrderedDict) to download it
uuid = list(products.keys())[0]

In [1]:
# convert OrderedDict to geoDataFrame
products_gdf = api.to_geodataframe(products)

NameError: name 'api' is not defined

In [43]:
product_info = api.get_product_odata(uuid)
is_online = product_info['Online']
# or
is_online = api.is_online(uuid)

if is_online:
    print(f'Product is online. Starting download.')
    api.download(uuid)
else:
    print(f'Product is not online.')
    api.trigger_offline_retrieval(uuid)

Product is not online.


In [7]:
api.download(uuid)

Downloading S2B_MSIL1C_20180610T100029_N0206_R122_T33UVS_20180610T120934.zip:   0%|          | 0.00/429M [00:0…

MD5 checksumming:   0%|          | 0.00/429M [00:00<?, ?B/s]

{'id': '22e26b31-038f-4758-9c07-d35473207964',
 'title': 'S2B_MSIL1C_20180610T100029_N0206_R122_T33UVS_20180610T120934',
 'size': 428635512,
 'md5': 'b4efc2abf9c7c70fb689dcc097e99018',
 'date': datetime.datetime(2018, 6, 10, 10, 0, 29, 24000),
 'footprint': 'POLYGON((14.15049975433055 50.45832402638246,14.20341180898468 50.58960217174912,14.2623959076342 50.73556219939568,14.321475292006062 50.88154735907927,14.381132916978231 51.02741507413424,14.440027822954823 51.17348242872262,14.499624609043536 51.319345316462666,14.552385876309906 51.44784011302054,15.140457957691325 51.451098078258774,15.137512494758056 50.463717167775926,14.15049975433055 50.45832402638246))',
 'url': "https://apihub.copernicus.eu/apihub/odata/v1/Products('22e26b31-038f-4758-9c07-d35473207964')/$value",
 'Online': True,
 'Creation Date': datetime.datetime(2018, 6, 10, 17, 17, 58, 288000),
 'Ingestion Date': datetime.datetime(2018, 6, 10, 17, 6, 25, 429000),
 'manifest_name': 'manifest.safe',
 'product_root_dir'

In [42]:
api.concurrent_lta_trigger_limit

TypeError: 'int' object is not callable