In [None]:
import requests
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv

In [None]:
# Get Environment Variables
load_dotenv()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')

In [None]:
# This dictionary structure matches list
download_list = {
    # AOI 1
    '21WWV': ['S2A_20160620', 'S2A_20160822', 'S2A_20170725', 'S2A_20170804', 'S2B_20170723'],
    '21XWA': ['S2A_20160802', 'S2A_20160822', 'S2A_20160911', 'S2A_20170728', 'S2B_20170723'],
    '21XWB': ['S2A_20160802', 'S2A_20160825', 'S2A_20170724', 'S2A_20170728', 'S2B_20170825'],
    # AOI 2
    '21WXS': ['S2A_20160518', 'S2A_20160707', 'S2A_20160727', 'S2A_20160813', 'S2A_20160830'],
    '21WXQ': ['S2A_20160601', 'S2A_20160614', 'S2A_20160711', 'S2A_20160813', 'S2A_20160830'],
    '22WDA': ['S2A_20160601', 'S2A_20160614', 'S2A_20160724', 'S2A_20160813', 'S2A_20160830'],
    '22WDB': ['S2A_20160601', 'S2A_20160621', 'S2A_20160721', 'S2A_20160813', 'S2A_20160830'],
    '22WEA': ['S2A_20160618', 'S2A_20160618', 'S2A_20160708', 'S2A_20160731', 'S2A_20160830'],
    '22WEB': ['S2A_20160601', 'S2A_20160628', 'S2A_20160724', 'S2A_20160813', 'S2A_20160830'],
    # AOI 3
    '22VFN': ['S2A_20160719', 'S2A_20160805', 'S2A_20160910', 'S2A_20160920', 'S2A_20160927'],
    '22VFP': ['S2A_20160719', 'S2A_20160729', 'S2A_20160831', 'S2A_20160910', 'S2A_20160927'],
    '23VLH': ['S2A_20160527', 'S2A_20160719', 'S2A_20160805', 'S2A_20160924', 'S2A_20160927'],
    '22VEQ': ['S2A_20160615', 'S2A_20160725', 'S2A_20160831', 'S2A_20160910', 'S2A_20160920'],
    '22VEP': ['S2A_20160719', 'S2A_20160910', 'S2A_20160920', 'S2A_20160927'],
    # AOI 4
    '23VMG': ['S2A_20160527', 'S2A_20160703', 'S2A_20160726', 'S2A_20160911', 'S2A_20160924'],
    '23VMH': ['S2A_20160527', 'S2A_20160726', 'S2A_20160805', 'S2A_20160828', 'S2A_20160924'],
    '23VNG': ['S2A_20160527', 'S2A_20160613', 'S2A_20160703', 'S2A_20160901', 'S2A_20160911', 'S2A_20160921'],
    '23VNH': ['S2A_20160726', 'S2A_20160822', 'S2A_20160901', 'S2A_20160921']
}

product_level = 'S2MSI2A'



In [None]:
# --- API Endpoints ---
TOKEN_URL = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'
CATALOGUE_URL = 'https://catalogue.dataspace.copernicus.eu/odata/v1'


def get_access_token(client_id, client_secret):
    """Fetches an OAuth2 access token using Client Credentials."""
    print("Attempting to get access token using Client Credentials...")
    token_data = {
        'client_id': client_id,
        'client_secret': client_secret,
        'grant_type': 'client_credentials',
    }

    try:
        response = requests.post(TOKEN_URL, data=token_data)
        response.raise_for_status()  # Raise an error for bad responses
        token = response.json().get('access_token')

        if not token:
            print("[ERROR] 'access_token' not found in response.")
            return None

        print("Successfully obtained new access token.")
        return token

    except requests.exceptions.HTTPError as e:
        print(
            f"[ERROR] Failed to get token: {e.response.status_code} {e.response.text}")
        return None
    except Exception as e:
        print(f"[ERROR] An error occurred during token request: {e}")
        return None


def search_and_download(session, tile, satellite, date_str, product_level):
    """
    Searches for, downloads, and returns a status.
    Returns:
        'SUCCESS'
        'REFRESH_TOKEN' (if a 401 error is received)
        'DOWNLOAD_FAILED'
        'EXCEPTION'
    """
    try:
        # --- Build the OData Filter Query ---
        start_date_obj = datetime.strptime(date_str, '%Y%m%d')
        end_date_obj = start_date_obj + timedelta(days=1)
        start_date_str_iso = start_date_obj.strftime('%Y-%m-%dT%H:%M:%S.000Z')
        end_date_str_iso = end_date_obj.strftime('%Y-%m-%dT%H:%M:%S.000Z')
        product_abbreviation = 'MSIL2A' if product_level == 'S2MSI2A' else 'MSIL1C'

        filter_query = (
            f"startswith(Name, '{satellite}_') and "
            f"contains(Name, '_{product_abbreviation}_') and "
            f"contains(Name, '_T{tile}_') and "
            f"ContentDate/Start ge {start_date_str_iso} and "
            f"ContentDate/Start lt {end_date_str_iso}"
        )
        search_url = f"{CATALOGUE_URL}/Products?$filter={filter_query}"

        # --- Make the Search Request ---
        print(f"Searching for: Tile={tile}, Sat={satellite}, Date={date_str}")
        response = session.get(search_url)

        if response.status_code == 401:
            return 'REFRESH_TOKEN'  # Signal to main
        response.raise_for_status()  # Raise for other errors (404, 500)

        results = response.json().get('value')
        if not results:
            print(f"  > No product found.")
            return 'DOWNLOAD_FAILED'

        product = results[0]
        product_id = product['Id']
        product_name = product['Name']
        print(f"  > Found product: {product_name} (Id: {product_id})")

        # --- Prepare for Download ---
        # Use the SHORT name to avoid "Path too long" error
        # Define the directory name from the tile
        tile_directory = tile  # e.g., "21WXS"

        # Create this directory if it doesn't exist (safe to run every time)
        os.makedirs(tile_directory, exist_ok=True)

        # Use the SHORT name to avoid "Path too long" error
        short_filename = f"{satellite}_{tile}_{date_str}.zip"

        # Build the full path (e.g., "21WXS/S2A_21WXS_20160518.zip")
        output_filename = os.path.join(tile_directory, short_filename)
        if os.path.exists(output_filename):
            print(f"  > File already exists, skipping: {output_filename}")
            return 'SUCCESS'

        # Use the correct OData syntax (no quotes around UUID)
        download_url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value"
        print(f"  > Downloading to {output_filename} ...")

        # --- Stream the Download ---
        with session.get(download_url, stream=True) as response:
            if response.status_code == 401:
                return 'REFRESH_TOKEN'  # Signal to main

            if response.status_code == 200:
                with open(output_filename, "wb") as file:
                    for chunk in response.iter_content(chunk_size=8192):
                        if chunk:
                            file.write(chunk)
                print(f"  > Download complete.")
                return 'SUCCESS'
            else:
                # Error from the *download* server
                print(
                    f"Failed to download file. Status code: {response.status_code}")
                print(response.text)
                return 'DOWNLOAD_FAILED'

    except Exception as e:
        print(
            f"[ERROR] An error occurred while processing {satellite}_{date_str} for tile {tile}:")
        print(f"  > {e}")
        return 'EXCEPTION'


def main():
    # --- Get initial token ---
    token = get_access_token(CLIENT_ID, CLIENT_SECRET)
    if not token:
        print("Script cannot continue without an access token. Exiting.")
        return

    # --- Create the session ---
    session = requests.Session()
    session.headers.update({'Authorization': f'Bearer {token}'})

    print("\nStarting targeted search and download...")

    # --- Main Loop ---
    for tile, image_specs in download_list.items():
        print(f"\n--- Processing Tile: {tile} ---")

        # Use a for loop, as 'while' can cause infinite loops on errors
        for spec in image_specs:
            try:
                spec_clean = spec.strip()
                parts = spec_clean.split('_')
                satellite = parts[0].strip()
                date_str = parts[1].strip()

                # --- First Download Attempt ---
                status = search_and_download(session, tile, satellite,
                                             date_str, product_level)

                # --- Handle Token Refresh ---
                if status == 'REFRESH_TOKEN':
                    print("  > Token expired or invalid. Refreshing...")
                    token = get_access_token(CLIENT_ID, CLIENT_SECRET)

                    if not token:
                        print(
                            "  > FATAL: Failed to refresh token. Skipping rest of tile.")
                        break  # Break inner loop (stops processing this tile)

                    session.headers.update(
                        {'Authorization': f'Bearer {token}'})

                    # --- Retry the download ---
                    print(f"  > Retrying download for {spec_clean}...")
                    status = search_and_download(session, tile, satellite,
                                                 date_str, product_level)

                    if status == 'REFRESH_TOKEN':
                        print(
                            f"  > FATAL: Refresh failed. Token is still invalid. Skipping rest of tile.")
                        break  # Break inner loop
                    elif status != 'SUCCESS':
                        print(f"  > Retry failed for {spec_clean}.")

            except Exception as e:
                print(f"[ERROR] Failed to parse spec '{spec}': {e}")

    print("\n--- Download script finished. ---")


if __name__ == "__main__":
    main()