**Sentinel-2 Search, Quicklook, and Download**

Tutorial by Rasmus Meyer (rpm@ign.ku.dk) :)


Prerequisites:
 - A Free Account https://dataspace.copernicus.eu/ 
 - A shapefile/geojson polygon to define the area of interest
 - A python environment with the nessesary libraries 

 **Description:**
 This script is made for downloading a large number of Sentinel-2 scenes throrugh Copernicus Data Space Ecosystem (CDSE).

- Step 1: Define parameters like area of interest, dataset, datetime interval, and cloud cover.

- Step 2: Download Quicklook of S2 scenes as .jpg to folder (You almost always want to screen your S2 scenes to save time and space before actually downloading large S2 files)

- Step 3: Manually delete quicklooks of S2 scenes in folder that you do not need and write the remaining S2 scene ID and name to a .csv file. 

- Step 4: Generate SDSE download token (allows you to access and download), read CSV file containing S2 scene identifiers and Download scenes to folder. 

In [4]:
# HTTP requests
import requests
# JSON parser
import json
# XML parser
import xml.etree.ElementTree as ET
# system modules
import os
import re
import sys
import random
# data manipulation
import pandas as pd
import geopandas as gpd
import numpy as np
# image manipulation
import rasterio
import matplotlib.pyplot as plt
import matplotlib.image
from rasterio.windows import Window
# file manipulation
from pathlib import Path

**STEP 1 & 2: Define parameters and download Quicklooks**

In [9]:
aoi_path = r"C:\Users\gjm501\Documents\Projects\2502_UiO\Sermilik_AOI.shp"

# Define parameters
start_date = "2022-07-15T00:00:00.000Z"
end_date = "2022-09-15T23:59:59.999Z"
cloud_cover_max = 10
dataset = "S2MSI1C" #Sentinel-2 TOA (top-of-atmosphere (lvl 1))
scenes_max = 2 # Max number of S2 scenes to retrieve (test first with a small number) 


quicklook_dir = fr"C:\Users\gjm501\Documents\Projects\2502_UiO\quicklooks_download" ## Path to quicklook folder

##-----------------------------------------------------##
gdf = gpd.read_file(aoi_path)
# Check and convert CRS to EPSG:4326
if gdf.crs != "EPSG:4326":
    gdf = gdf.to_crs(epsg=4326)
    print("✅ Converted AOI CRS to EPSG:4326")
else:
    print("✅ AOI is already in EPSG:4326")

# Convert to WKT format with SRID 4326
selected_aoi = gdf.geometry.iloc[0]
wkt_aoi = f"SRID=4326;{selected_aoi.wkt}"

print("Selected AOI in WKT format:", wkt_aoi)


# onstruct API query with AOI, Date, and Cloud Cover filter
cdse_api_url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=\
OData.CSC.Intersects(area=geography'{wkt_aoi}') and \
(Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le {cloud_cover_max}.00)) and \
(Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq {dataset})) and \
ContentDate/Start gt {start_date} and \
ContentDate/Start lt {end_date}&$expand=Assets&$top={scenes_max}"

# Send request
response = requests.get(cdse_api_url)

# Check if request was successful
if response.status_code == 200:
    json_data = response.json()
    print('json_value', json_data["value"])
    df = pd.DataFrame.from_dict(json_data["value"])
    
    # Print relevant columns
    columns_to_print = ["Id", "Name", "S3Path", "GeoFootprint", "Assets"]
    print(df[columns_to_print].head(3))  # Print first 3 results

    # Save full API response to inspect Quicklook links
    with open("cdse_response.json", "w") as f:
        f.write(response.text)

    print("✅ Query successful! Data saved in `cdse_response.json`")

# Extract Quicklook URLs from `Assets`
quicklook_urls = {}

for index, row in df.iterrows():
    assets = row.get("Assets", [])  # Get the Assets field or default to an empty list
    if isinstance(assets, list):  # Ensure Assets is a list
        for asset in assets:
            if asset.get("Type") == "QUICKLOOK":  # Find Quicklook assets
                quicklook_id = asset.get("Id")  # Get the Quicklook's unique Id
                quicklook_urls[row["Id"]] = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Assets({quicklook_id})/$value"

# Print extracted Quicklook URLs
#print("Extracted Quicklook URLs:", quicklook_urls)

import os
import requests

# Create a folder to store Quicklooks
os.makedirs(quicklook_dir, exist_ok=True)

# Function to download Quicklooks
def download_quicklook(uuid, url):
    response = requests.get(url)
    if response.status_code == 200:
        file_path = os.path.join(quicklook_dir, f"{uuid}.jpg")
        with open(file_path, "wb") as f:
            f.write(response.content)
        print(f"✅ Downloaded Quicklook: {file_path}")
    else:
        print(f"❌ Failed to download Quicklook for {uuid} (Status: {response.status_code})")

# Loop through extracted Quicklook URLs and download
for uuid, url in quicklook_urls.items():
    download_quicklook(uuid, url)

✅ AOI is already in EPSG:4326
Selected AOI in WKT format: SRID=4326;POLYGON ((-37.980470864857 65.93130091030518, -37.17216729481364 65.88496249481673, -37.288812875871116 65.53287147950994, -38.09168633577521 65.57718663528689, -37.980470864857 65.93130091030518))
✅ Downloaded Quicklook: C:\Users\gjm501\Documents\Projects\2502_UiO\quicklooks_download\c797a1c6-fff8-5bb1-bc23-e6b02e8a2ed0.jpg
✅ Downloaded Quicklook: C:\Users\gjm501\Documents\Projects\2502_UiO\quicklooks_download\88706b3b-ec80-5bae-b8a0-5d58e751fd2e.jpg


**Step 3: Manually delete scenes in quicklook folder and run script below to retrieve S2 scene ID's**

In [10]:
# List all remaining Quicklooks
remaining_quicklooks = [f.split(".")[0] for f in os.listdir(quicklook_dir) if f.endswith(".jpg")]
# Print remaining Quicklook UUIDs
print("Remaining Quicklook UUIDs:", remaining_quicklooks)

# Filter the DataFrame to keep only rows where Id is in remaining Quicklooks
filtered_df = df[df["Id"].isin(remaining_quicklooks)]
# Extract scene names (identifiers)
scene_names = filtered_df[["Id","Name"]]
# Print remaining scene names
print("Remaining Scenes:", scene_names)

scene_names.to_csv(fr"C:\Users\gjm501\Documents\Projects\2502_UiO\Quicklooks_download.csv", index=False)
print("✅ Saved remaining scene names to `remaining_scenes.csv`")

Remaining Quicklook UUIDs: ['c797a1c6-fff8-5bb1-bc23-e6b02e8a2ed0']
Remaining Scenes:                                      Id  \
0  c797a1c6-fff8-5bb1-bc23-e6b02e8a2ed0   

                                                Name  
0  S2A_MSIL1C_20220722T142031_N0400_R096_T24WWU_2...  
✅ Saved remaining scene names to `remaining_scenes.csv`


**Step 4: Download S2 scenes**

Step 4.1: Set credentials and recieve access token for download 

In [13]:
import requests

# Your CDSE API credentials
cdse_username = "rpm@ign.ku.dk"  # Replace with your actual username
cdse_password = "Spatiotemporal01!"  # Replace with your actual password

# URL for authentication
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"

# Authentication payload
payload = {
    "grant_type": "password",
    "username": cdse_username,
    "password": cdse_password,
    "client_id": "cdse-public",
}

# Request access token
response = requests.post(token_url, data=payload)

if response.status_code == 200:
    token_data = response.json()
    access_token = token_data["access_token"]  # Store this for later API calls
    refresh_token = token_data["refresh_token"]  # Store for refreshing the token
    print("✅ Access Token Received!")
else:
    print(f"❌ Authentication failed! Status code: {response.status_code}")
    print(response.text)


✅ Access Token Received!


Step 4.2: Download S2 scenes

In [14]:
import requests
import pandas as pd
import os
import time

# Load the CSV File with Sentinel-2 Product IDs
csv_path = fr"C:\Users\gjm501\Documents\Projects\2502_UiO\Quicklooks_download.csv"

# Set Up Download Folder
download_folder = r"C:\Users\gjm501\Documents\Projects\2502_UiO\S2_download"

# Read the CSV file (ensure "Id" column exists)
df = pd.read_csv(csv_path)
#df = scene_names


os.makedirs(download_folder, exist_ok=True)

# Download Each Sentinel-2 Scene Based on "Id" Column
def download_sentinel2_scene(product_id):
    download_url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value"
    headers = {"Authorization": f"Bearer {access_token}"}
    file_path = os.path.join(download_folder, f"{product_id}.zip")

    response = requests.get(download_url, headers=headers, stream=True)

    if response.status_code == 200:
        with open(file_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        print(f"✅ Downloaded: {file_path}")
    else:
        print(f"❌ Failed to download: {product_id} (Status: {response.status_code})")
        print(response.text)

# Loop Through Each ID and Download
for product_id in df["Id"]:
    download_sentinel2_scene(product_id)
    time.sleep(2)  # Avoid API rate limits

✅ Downloaded: C:\Users\gjm501\Documents\Projects\2502_UiO\S2_download\c797a1c6-fff8-5bb1-bc23-e6b02e8a2ed0.zip
