In [1]:
'''
Auto-dowloading Sentinel data
Author: Songyan Zhu
Contact: szhu4@ed.ac.uk
---------Log of Changes-------------
Created: 2023-11-14
Updated: 2023-11-16
    |-> Make it operational
To do: only download L2A
'''

# Import credentials
import yaml
import requests
import argparse
import pandas as pd
import geopandas as gpd
from creds import *

def load_yaml_config(p):
    with open(p, "r") as stream:
        try:
            yamlfile = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            assert(exc)
    return yamlfile

def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
        )
    return r.json()["access_token"]

def load_config(p):
    cfg = load_yaml_config(p)
    username = cfg['username']
    password = cfg['password']
    start_date = cfg['start_date']
    end_date = cfg['end_date']
    data_collection = cfg['data_collection']
    roi_str = cfg['roi_str']
    roi_file = cfg['roi_file']

    # ----------------------------------------------------------------------------------
    if roi_file:
        shp = gpd.read_file(roi_file)
        if shp.crs.to_epsg() != 4326: shp = shp.to_crs(4326)
        roi = shp.geometry.to_wkt()[0] + "'" # only takes the first row!
    elif roi_str:
        roi = roi_str
    else:
        raise Exception('Either roi_file or roi_str must exist in the configuration!')

    return username, password, start_date, end_date, data_collection, roi

def retrieve(p_config):
    username, password, start_date, end_date, data_collection, roi = load_config(p_config)

    json = requests.get(f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{roi}) and ContentDate/Start gt {start_date}T00:00:00.000Z and ContentDate/Start lt {end_date}T00:00:00.000Z").json()
    dfd = pd.DataFrame.from_dict(json['value'])
    dfd = dfd[dfd['S3Path'].str.contains('/L2A/')] # Only L2A products
    n_files = len(dfd)
    print(f'Beginning to retrieve {n_files} {data_collection} files...')
    for cnt in dfd.index:
        image_id = dfd.loc[cnt, 'Id']
        savename = data_collection + '-' + str(cnt).zfill(4) + '-' + dfd.loc[cnt, 'OriginDate'].split('T')[0].replace('-', '') + '.zip'

        url = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({image_id})/$value"
        access_token = get_access_token(username, password)
        headers = {"Authorization": f"Bearer {access_token}"}

        session = requests.Session()
        session.headers.update(headers)
        response = session.get(url, headers=headers, stream=True)

        with open(savename, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    file.write(chunk)
        print(f'{cnt + 1} done, {n_files - cnt} remaining...')
        print('-' * 100)
        
# if __name__ == '__main__':
#     # Example: 
#     parser = argparse.ArgumentParser()
#     parser.add_argument("-p", nargs = "?", default = 'auto_Sentinel_download.yaml', type = str)
#     args = parser.parse_args()
#     p_config = args.p
#     retrieve(p_config)

In [2]:
p_config = 'auto_Sentinel_download.yaml'
username, password, start_date, end_date, data_collection, roi = load_config(p_config)
json = requests.get(f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{roi}) and ContentDate/Start gt {start_date}T00:00:00.000Z and ContentDate/Start lt {end_date}T00:00:00.000Z").json()
dfd = pd.DataFrame.from_dict(json['value'])
dfd = dfd[dfd['S3Path'].str.contains('/L2A/')]

In [3]:
dfd

Unnamed: 0,@odata.mediaContentType,Id,Name,ContentType,ContentLength,OriginDate,PublicationDate,ModificationDate,Online,EvictionDate,S3Path,Checksum,ContentDate,Footprint,GeoFootprint
0,application/octet-stream,b22e6d58-dd3f-5582-ae81-827ba2a5a9cb,S2B_MSIL2A_20220603T112119_N0400_R037_T30UVB_2...,application/octet-stream,0,2022-06-07T12:37:01.333Z,2022-06-07T12:58:17.615Z,2022-06-07T12:58:17.615Z,True,,/eodata/Sentinel-2/MSI/L2A/2022/06/03/S2B_MSIL...,[],"{'Start': '2022-06-03T11:21:19.024Z', 'End': '...",geography'SRID=4326;POLYGON ((-4.439453 51.442...,"{'type': 'Polygon', 'coordinates': [[[-4.43945..."
1,application/octet-stream,09f8f58c-fda4-5353-8a41-8a0f104b465d,S2B_MSIL1C_20220603T112119_N0400_R037_T30UVB_2...,application/octet-stream,0,2022-06-03T14:26:10.959Z,2022-06-03T14:34:39.629Z,2022-06-03T14:34:39.629Z,True,,/eodata/Sentinel-2/MSI/L1C/2022/06/03/S2B_MSIL...,[],"{'Start': '2022-06-03T11:21:19.024Z', 'End': '...",geography'SRID=4326;POLYGON ((-4.439453 51.442...,"{'type': 'Polygon', 'coordinates': [[[-4.43945..."
2,application/octet-stream,85b3875b-5c71-560a-a52f-1414b4835bdb,S2A_MSIL1C_20220608T112121_N0400_R037_T30UVB_2...,application/octet-stream,0,2022-06-08T18:20:41.781Z,2022-06-08T18:26:55.830Z,2022-06-08T18:26:55.830Z,True,,/eodata/Sentinel-2/MSI/L1C/2022/06/08/S2A_MSIL...,[],"{'Start': '2022-06-08T11:21:21.024Z', 'End': '...",geography'SRID=4326;POLYGON ((-4.439453 51.442...,"{'type': 'Polygon', 'coordinates': [[[-4.43945..."
3,application/octet-stream,6908b38f-a697-5e25-8437-b06cb22e89da,S2A_MSIL2A_20220608T112121_N0400_R037_T30UVB_2...,application/octet-stream,0,2022-06-08T19:54:17.759Z,2022-06-10T05:03:11.567Z,2022-06-10T05:03:11.567Z,True,,/eodata/Sentinel-2/MSI/L2A/2022/06/08/S2A_MSIL...,[],"{'Start': '2022-06-08T11:21:21.024Z', 'End': '...",geography'SRID=4326;POLYGON ((-4.439453 51.442...,"{'type': 'Polygon', 'coordinates': [[[-4.43945..."
