### Download data from "Sentinel on AWS" to local machine

 Because retrieving a product from LTA is only allowed once every 30 min from ESA we can use "Sentinel on AWS" as an alternative source

Docs: https://registry.opendata.aws/sentinel-2/

Sentinel-2 Datastructure on AWS: https://roda.sentinel-hub.com/sentinel-s2-l2a/readme.html

AWS provides all Sentinel-2 data to download from a Requester Pays S3 bucket. \
Fortunately, AWS provides a free tier to download 100 GB of data per month. \
But you need to have an AWS account and a credit card to register.


1. connect to aws s3 bucket
2. determine the path to the product from title name received from sentinel hub search
3. download the product to local machine
4. error handling 

In [15]:
import boto3
from dotenv import load_dotenv
import os
import re

In [16]:
load_dotenv()
aws_access_key_id= os.getenv("aws_access_key_id")
aws_secret_access_key = os.getenv("aws_secret_access_key")

# Let's use Amazon S3
s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

In [17]:
title = "S2B_MSIL1C_20180610T100029_N0206_R122_T33UVS_20180610T120934"
title_regex = re.compile(r"""^(?P<mission>S2[A-B])_MSI
                        (?P<product_level>L[1-2][A-C])_
                        (?P<sensing_time>\d{8}T\d{6})_
                        (?P<processing_baseline>N\d{4})_
                        (?P<relative_orbit>R\d{3})_T
                        (?P<utm_code>\d{2})
                        (?P<latitude_band>\w{1})
                        (?P<square>\w{2})_
                        (?P<year>\d{4})
                        (?P<month>\d{2})
                        (?P<day>\d{2})T
                        (?P<product_time>\d{6})""",re.VERBOSE)

In [18]:
regex_match = re.search(title_regex, title)
if regex_match:
    mission = regex_match.group("mission")
    utm_code = regex_match.group("utm_code")
    product_level = regex_match.group("product_level").lower()
    latitude_band = regex_match.group("latitude_band")
    square = regex_match.group("square")
    year = regex_match.group("year")
    month = str(int(regex_match.group("month")))
    day = str(int(regex_match.group("day")))

In [8]:
print(f'mission {mission}')
print(f'product_level {product_level}')
print(f'utm_code {utm_code}')
print(f'latitude_band {latitude_band}')
print(f'square {square}')
print(f'year {year}')
print(f'month {month}')
print(f'day {day}')

mission S2B
product_level l1c
utm_code 33
latitude_band U
square VS
year 2018
month 6
day 10


In [19]:
# https://roda.sentinel-hub.com/sentinel-s2-l2a/readme.html
bucket = f'sentinel-s2-{product_level}'
# add variables to prefix to get different resolutions
prefix = f'tiles/{utm_code}/{latitude_band}/{square}/{year}/{month}/{day}/0/R10m/'

In [20]:
s3.list_objects_v2(Bucket=bucket, Prefix=prefix, RequestPayer='requester')

{'ResponseMetadata': {'RequestId': 'VJC1V97JPP710STB',
  'HostId': 'cG2izaQTTIc5BC8PIUbCR75D83INhMKwDsufsrlDEWPoQOh65dTa6bBOE/5OC5PofRxq/1aiGrc=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'cG2izaQTTIc5BC8PIUbCR75D83INhMKwDsufsrlDEWPoQOh65dTa6bBOE/5OC5PofRxq/1aiGrc=',
   'x-amz-request-id': 'VJC1V97JPP710STB',
   'date': 'Thu, 13 Apr 2023 16:56:10 GMT',
   'x-amz-request-charged': 'requester',
   'x-amz-bucket-region': 'eu-central-1',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 1},
 'IsTruncated': False,
 'Contents': [{'Key': 'tiles/33/U/VS/2018/6/10/0/B01.jp2',
   'LastModified': datetime.datetime(2018, 6, 10, 17, 30, 36, tzinfo=tzutc()),
   'ETag': '"b20c73401f54404ad702a067aac847d0"',
   'Size': 2116548,
   'StorageClass': 'INTELLIGENT_TIERING'},
  {'Key': 'tiles/33/U/VS/2018/6/10/0/B02.jp2',
   'LastModified': datetime.datetime(2018, 6, 10, 17, 30, 36, tzinfo=tzutc()),
   'ETag': '"ae3da17f3e3c

In [11]:
BAND_FILE_MAP = {
    "B02": None,  # blue
    "B03": None,  # green
    "B04": None,  # red
    "B08": None,  # NIR
    }
for band in BAND_FILE_MAP:
    print(f'band {band}')

band B02
band B03
band B04
band B08


In [25]:
from pathlib import Path, PurePath
import os
downloaded_size = 0
download_path = Path(r"C:\Users\Fabian\Documents\Masterarbeit_Daten\AWS_test4")
target_folder = download_path / title
target_folder.mkdir(parents=True, exist_ok=True)
band_file = f'{band}.jp2'
band_file_path = target_folder / band_file

In [28]:
f'{prefix}/{band_file}'

'tiles/33/U/VS/2018/6/10/0/B08.jp2'

In [None]:
response = s3.get_object(Bucket=bucket,
                            Key=prefix,
                            RequestPayer='requester')

In [29]:
response = s3.get_object(Bucket=bucket,
                            Key=f'{prefix}/{band_file}',
                            RequestPayer='requester')
response_content = response['Body'].read()
with open(band_file_path, 'wb') as file:
    file.write(response_content)
    downloaded_size += band_file_path.stat().st_size

In [30]:
downloaded_size

64485091

In [35]:
def convert_bytes(size):
    """ Convert bytes to KB, or MB or GB"""
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.1f %s" % (size, x)
        size /= 1024.0
x = convert_bytes(downloaded_size)
print('file size is', x)

file size is 61.5 MB


In [34]:
print (downloaded_size >> 10)

62973


In [40]:
# build-in
from dotenv import load_dotenv
import os
import re
from pathlib import Path
import pickle
from datetime import datetime

# third-party
import boto3

In [73]:
def write_downloaded_size(target_folder: Path) -> None:
    # Get all files in folder
    files = list(target_folder.iterdir())

    # Calculate total size of files
    total_size = sum(f.stat().st_size for f in files if f.is_file())
    
    root_folder = target_folder.parents[0]
    
    # Load existing pickle file or create empty dictionary
    if (root_folder / "downloaded_size_logs.pickle").exists():
        # ? wrb is used due to an error with mypy
        with open((root_folder / "downloaded_size_logs.pickle"), "r+b") as f:
            size_logs = pickle.load(f)
    else:
        size_logs = {}
     
    # Add or update the size for the current date
    today = datetime.now().date()
    if today not in size_logs:
        size_logs[today] = total_size
    else:
        size_logs[today] += total_size
    
    # Save the updated size logs to the pickle file
    with open((root_folder / "downloaded_size_logs.pickle"), "w+b") as f:
        pickle.dump(size_logs, f)

In [78]:

def check_aws_free_tier_available(root_folder: Path) -> bool:
    # Get the current year and month
    now = datetime.now()
    year = now.year
    month = now.month

    if (root_folder / "downloaded_size_logs.pickle").exists():
        print("File exists")
        with open((root_folder / "downloaded_size_logs.pickle"), "rb") as f:
            size_logs = pickle.load(f)
    else:
        size_logs = {}
    
    # Calculate the sum of sizes for all days in the current month
    current_month_sum = sum(
        size_logs[date]
        for date in size_logs
        if datetime.strptime(date, "%Y-%m-%d").year == year and
        datetime.strptime(date, "%Y-%m-%d").month == month
    )
    # ToDo: replace hard-coded value with a constant
    if current_month_sum < 90 * 1024 * 1024 * 1024:
        print(f"Current month sum is: {current_month_sum/1024} kB.")
        return True
    else:
        print("Current month sum is 90 GB or above.")
        return False

In [75]:
def download_from_aws(identifier: str, target_folder: Path) -> bool:
    
    if not check_aws_free_tier_available(target_folder.parents[0]):
        return False

    load_dotenv()
    aws_access_key_id= os.getenv("aws_access_key_id")
    aws_secret_access_key = os.getenv("aws_secret_access_key")

    # Let's use Amazon S3
    s3 = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

    identifier_regex = re.compile(r"""^(?P<mission>S2[A-B])_MSI
                        (?P<product_level>L[1-2][A-C])_
                        (?P<sensing_time>\d{8}T\d{6})_
                        (?P<processing_baseline>N\d{4})_
                        (?P<relative_orbit>R\d{3})_T
                        (?P<utm_code>\d{2})
                        (?P<latitude_band>\w{1})
                        (?P<square>\w{2})_
                        (?P<year>\d{4})
                        (?P<month>\d{2})
                        (?P<day>\d{2})T
                        (?P<product_time>\d{6})""",re.VERBOSE)
    
    BAND_FILE_MAP = {
    "B02": None,  # blue
    "B03": None,  # green
    "B04": None,  # red
    "B08": None,  # NIR
    }
    
    regex_match = re.search(identifier_regex, identifier)
    
    if regex_match:
        mission = regex_match.group("mission")
        utm_code = regex_match.group("utm_code")
        product_level = regex_match.group("product_level").lower()
        latitude_band = regex_match.group("latitude_band")
        square = regex_match.group("square")
        year = regex_match.group("year")
        month = str(int(regex_match.group("month")))
        day = str(int(regex_match.group("day")))

    # https://roda.sentinel-hub.com/sentinel-s2-l2a/readme.html
    bucket = f'sentinel-s2-{product_level}'
    prefix = f'tiles/{utm_code}/{latitude_band}/{square}/{year}/{month}/{day}/0'

    for band in BAND_FILE_MAP:
        band_file = f'{band}.jp2'
        band_file_path = target_folder / band_file
        
        if band_file_path.exists():
            continue
        
        # https://stackoverflow.com/questions/63323425/download-sentinel-file-from-s3-using-python-boto3
        response = s3.get_object(Bucket=bucket,
                            Key=f'{prefix}/{band_file}',
                            RequestPayer='requester')
        response_content = response['Body'].read()
        with open(band_file_path, 'wb') as file:
            file.write(response_content)
   
    write_downloaded_size(target_folder)
    return True

In [79]:
download_from_aws(title, target_folder)

File exists
Current month sum is: 221042.0556640625 kB.


True

In [61]:
root_folder = Path(r"C:\Users\Fabian\Documents\Masterarbeit_Daten\AWS_test4")
if (root_folder / "downloaded_size_logs.pickle").exists():
    with open((root_folder / "downloaded_size_logs.pickle"), "rb") as f:
        size_logs = pickle.load(f)

In [62]:
size_logs

{'2023-04-13': 226347065}

In [70]:
today = datetime.now().date()

In [72]:
today.month

4

In [38]:
import pickle
from datetime import datetime
def check_aws_free_tier(root_folder: Path) -> bool:
    # Get the current year and month
    now = datetime.now()
    year = now.year
    month = now.month

    if Path("downloaded_size_logs.pickle").exists():
        with open("downloaded_size_logs.pickle", "rb") as f:
            size_logs = pickle.load(f)
    else:
        size_logs = {}
    
    # Calculate the sum of sizes for all days in the current month
    current_month_sum = sum(
        size_logs[date]
        for date in size_logs
        if datetime.strptime(date, "%Y-%m-%d").year == year and
        datetime.strptime(date, "%Y-%m-%d").month == month
    )
    
    if current_month_sum < 90 * 1024 * 1024 * 1024:
        print("Current month sum is below 90 GB.")
        return True
    else:
        print("Current month sum is 90 GB or above.")
        return False

In [39]:
check_aws_free_tier(download_path)

Current month sum is below 90 GB.


True

In [134]:
# Verbindung zum AWS-Konto herstellen
session = boto3.session.Session()
client = session.client('service-quotas',region_name='eu-west-1')

# Abfragen des Datenübertragungskontingents des kostenlosen Tiers für das Konto
response = client.get_service_quota(
    ServiceCode='s3',
    QuotaCode='L-TXRQWZ12LVCZ'
)

# Extrahieren des verbleibenden Betrags des kostenlosen Kontingents
free_tier_remaining = response['Quota']['Value'] - response['Usage']['MetricValue']
print(f"Free Tier Data Transfer Remaining: {free_tier_remaining} GB")

NoSuchResourceException: An error occurred (NoSuchResourceException) when calling the GetServiceQuota operation: 

In [52]:
# https://stackoverflow.com/questions/63323425/download-sentinel-file-from-s3-using-python-boto3

response = s3.get_object(Bucket='sentinel-s2-l1c',
                        Key='tiles/7/W/FR/2018/3/31/0/B8A.jp2', 
                        RequestPayer='requester')
response_content = response['Body'].read()

with open('./B8A.jp2', 'wb') as file:
     file.write(response_content)

In [7]:
import re

filename = "S2A_MSIL1C_20170105T013442_N0204_R031_T53NMJ_20170105T013443.SAFE"

regex = r"^(S2[A-B])_(MSIL[0-9][AC])_([0-9]{8}T[0-9]{6})_N([0-9]{4})_R([0-9]{3})_T([A-Z0-9]{6})_([0-9]{8}T[0-9]{6})\.SAFE$"
match = re.match(regex, filename)

if match:
    mission = match.group(1)
    product_level = match.group(2)
    sensing_time = match.group(3)
    processing_baseline = match.group(4)
    relative_orbit = match.group(5)
    tile_number = match.group(6)
    product_time = match.group(7)
    print("Mission:", mission)
    print("Product level:", product_level)
    print("Sensing time:", sensing_time)
    print("Processing baseline:", processing_baseline)
    print("Relative orbit:", relative_orbit)
    print("Tile number:", tile_number)
    print("Product time:", product_time)
else:
    print("No match found")

No match found


In [23]:
load_dotenv()
aws_access_key_id= os.getenv("aws_access_key_id")
aws_secret_access_key = os.getenv("aws_secret_access_key")

# Let's use Amazon S3
client = boto3.client('service-quotas', region_name='eu-central-1', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

In [24]:
response = client.list_service_quotas(ServiceCode="s3")
response

{'Quotas': [],
 'ResponseMetadata': {'RequestId': '7149004a-e531-4017-b819-83783f528155',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 04 Apr 2023 21:33:05 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '13',
   'connection': 'keep-alive',
   'x-amzn-requestid': '7149004a-e531-4017-b819-83783f528155'},
  'RetryAttempts': 0}}

In [27]:
response = client.list_services(MaxResults=100)
response

{'NextToken': 'UFQCQUFBQUFBRUNBZ0I0Qy9rRkVxSzY0Y2I5aGZ1ek4vRGVCWGlmZWpIQjhMeXV3eHVTeno3K1dSQUJneklOOEgwYUVGREwrVkFGNjJwRDVnQUFBUm93Z2dFV0Jna3Foa2lHOXcwQkJ3YWdnZ0VITUlJQkF3SUJBRENCL1FZSktvWklodmNOQVFjQk1CNEdDV0NHU0FGbEF3UUJMakFSQkF5WUdvWnRTTytodVJaTk9WZ0NBUkNBZ2MrcWUzT1F4cUtETmlDODlCQ3kzKy9talM5QWVXbXNsUzZXSVFZTFRoNFJKcys0Z1c4aTZtaHBodmYrbGp4WVlHaG1sU3I2RkFZbEh6a1dzSEI1MmovKzM2ejVwWS8xVzJGMkpUMmFHenR1eEFKb2E2T0hVWkpLakpMbFlYMldTK0UzMk9wRnl4V3MyZlloTVZDd2wwYUZzdDJVdWQxcUhqUDVwVzE1dzlpZzNDK01NL283Y1UwK00ySnIzeWpvNjdzMm9jbVRla0dVYWIvZjYraGtOY21hOUplazI4eWhYY2pham40bit5d1JaS3FsR1dDZEdkMFh6TURxZk5YbmVpR3AxSFcwc0NPYjVtZEhmclAvRWdrPQ==',
 'Services': [{'ServiceCode': 'AWSCloudMap', 'ServiceName': 'AWS Cloud Map'},
  {'ServiceCode': 'access-analyzer', 'ServiceName': 'Access Analyzer'},
  {'ServiceCode': 'acm', 'ServiceName': 'AWS Certificate Manager (ACM)'},
  {'ServiceCode': 'acm-pca',
   'ServiceName': 'AWS Private Certificate Authority'},
  {'ServiceCode': 'airflow',
   'ServiceName': 'Amazo

In [13]:
response

{'NextToken': 'UFQCQUFBQUFBRUNBZ0I0Qy9rRkVxSzY0Y2I5aGZ1ek4vRGVCWGlmZWpIQjhMeXV3eHVTeno3K1dSQUI2QURXZE9kTHdlOTdtNURTV005UStBQUFBUm93Z2dFV0Jna3Foa2lHOXcwQkJ3YWdnZ0VITUlJQkF3SUJBRENCL1FZSktvWklodmNOQVFjQk1CNEdDV0NHU0FGbEF3UUJMakFSQkF4TWUrQXVyNGVqcHNlUjhhd0NBUkNBZ2MrVjc1ZFVkNE1mZmpHMk1oVElqY21oQk9NMVZRNm9EUTlxN2N6aUNQK2lTcTJlZDBGeS9FeXBJakM3SWlJQytYQ09EV3U2UStpdThvVllSODBVSFVhNHdhUmdIU0MybExTNFMvekplWVI3WjN6cHF4MkxqN3c1YkZlRDBmN2p4dmZqM2lhNkhCZjFwd1NUZWFlRTFWKzVpZ3oycjVMV05kVEdEb29EcG9PZHVNVlVDTVF0YWJ3eVJGdEQ5WDdBY0szSmNNNWpoL0xtMTdURElHeHVrVXZzRXloVVlqdnQ4STBtOTVxWjNrem9sMFM5TEhnTEM5VUZXTW5mT0xDd3FOeDJKUjBsTkV2Ti9BbFN0R2NJaE53PQ==',
 'Services': [{'ServiceCode': 'AWSCloudMap', 'ServiceName': 'AWS Cloud Map'},
  {'ServiceCode': 'access-analyzer', 'ServiceName': 'Access Analyzer'},
  {'ServiceCode': 'acm', 'ServiceName': 'AWS Certificate Manager (ACM)'},
  {'ServiceCode': 'acm-pca',
   'ServiceName': 'AWS Private Certificate Authority'},
  {'ServiceCode': 'airflow',
   'ServiceName': 'Amazo

In [10]:
test = "10"

In [11]:
int(test)

10

In [46]:
sentinel_bucket.Object('tiles/16/R/BV/2018/4/16/0/tileInfo.json').download_file('tileInfo.json')

ClientError: An error occurred (403) when calling the HeadObject operation: Forbidden

In [21]:
s3.dataLocation = 's3://sentinel-s2-l1c/tiles/32/T/KE/2019/4/3/0/'

In [35]:
s3.list_objects(Bucket='sentinel-s2-l2a', Prefix='tiles/16/R/BV/2018/4/16/0/tileInfo.json')

ClientError: An error occurred (AccessDenied) when calling the ListObjects operation: Access Denied

In [49]:
s3.get_object(Bucket='sentinel-s2-l2a', Key='tiles/16/R/BV/2018/4/16/0/tileInfo.json', RequestPayer='requester')

AttributeError: 'dict' object has no attribute 'download_file'

In [None]:
sentinel_bucket = s3.Bucket('sentinel-s2-l2a')

In [17]:
# Print out bucket names
for bucket in s3.buckets.all():
    print(bucket.name)

In [22]:
os.getcwd()

'c:\\Users\\Fabian\\Documents\\Github_Masterthesis\\Solarpark-detection\\src\\download_sentinel_data\\notebooks'