In [1]:
from cognito_client import CognitoClient
import boto3
from datetime import date
import rioxarray
import os
import rasterio as rio
from rasterio.session import AWSSession



In [3]:
client = CognitoClient(
    client_id="xxx",
    user_pool_id="xxx",
    identity_pool_id="xxx",
)

Username: aimee


In [4]:
_ = client.login()

········


In [5]:
# Fetch AWS Credentials
creds = client.get_aws_credentials()

In [6]:
# Use the credentials returned to upload to the staging bucket
access_key_id = creds["AccessKeyId"]
secret_access_key = creds["SecretKey"]
session_token = creds["SessionToken"]

s3 = boto3.client(
    "s3",
    aws_access_key_id=access_key_id,
    aws_secret_access_key=secret_access_key,
    aws_session_token=session_token,
)

session = boto3.Session(aws_access_key_id=access_key_id, 
                        aws_secret_access_key=secret_access_key,
                        aws_session_token=session_token)

if __name__ == "__main__":
    rio_env = rio.Env(AWSSession(session),
                      GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',
                      GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                      GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
    rio_env.__enter__()

bucket = "veda-data-store-staging"

In [7]:
prefix = 'EIS/COG/LIS_TWS_ANOMALY/'

In [8]:
d0 = date(2002, 9, 1)
d1 = date(2021, 12, 1)
delta = d1 - d0
print('The number of days between the given range of dates is :')
print(delta.days)

The number of days between the given range of dates is :
7031


In [9]:
try:
    # List objects in bucket
    objects = s3.list_objects_v2(
        Bucket=bucket,
        Prefix='EIS/COG/LIS_TWS_ANOMALY/'
    )
    print(f"✅ Able to list objects in bucket")
except Exception as err:
    print(f"❌ Failed to list objects in bucket, {err}")

✅ Able to list objects in bucket


In [10]:
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
all_objects = []
for page in pages:
    for obj in page['Contents']:
        all_objects.append(obj)

In [11]:
print(f'There are {len(all_objects)} in the bucket under prefix {prefix}')

There are 7033 in the bucket under prefix EIS/COG/LIS_TWS_ANOMALY/


In [16]:
%%time
for idx, obj in enumerate(all_objects[1170:]):
    if idx % 10 == 0:
        print(f"checked {idx} items")
    try:
        # Try top open the dataset with rioxarray, if it fails it's probably a mis-processed file
        s3_link = f's3://{bucket}/{obj["Key"]}'
        da = rioxarray.open_rasterio(s3_link)
        #da = da.squeeze('band', drop=True)
        # check both min and max are not 0
#         if all(v == 0 for v in [da.min() and da.max()]):
#             print(f"Min and max of {s3_link} are both 0")
    except Exception as err:
        print(f"Failed to open {s3_link}")

checked 0 items
checked 10 items
checked 20 items
checked 30 items
checked 40 items
checked 50 items
checked 60 items
checked 70 items
checked 80 items
checked 90 items
checked 100 items
checked 110 items
checked 120 items
checked 130 items
checked 140 items
checked 150 items
checked 160 items
checked 170 items
checked 180 items
checked 190 items
checked 200 items
checked 210 items
checked 220 items
checked 230 items
checked 240 items
checked 250 items
checked 260 items
checked 270 items
checked 280 items
checked 290 items
checked 300 items
checked 310 items
checked 320 items
checked 330 items
checked 340 items
checked 350 items
checked 360 items
checked 370 items
checked 380 items
checked 390 items
checked 400 items
checked 410 items
checked 420 items
checked 430 items
checked 440 items
checked 450 items
checked 460 items
checked 470 items
checked 480 items
checked 490 items
checked 500 items
checked 510 items
checked 520 items
checked 530 items
checked 540 items
checked 550 items
che