In [5]:
import ee
import ee.batch
import ee.data
import logging
import json
from gee_toolbox.gee import assets
from datetime import datetime, date, timedelta
from dateutil.relativedelta import relativedelta
from datetime import UTC as datetime_UTC
from observatorio_ipa import merge, spatial_4, spatial_8, temporal
from observatorio_ipa.processes import binary, monthly_export
from observatorio_ipa.utils import get_collection_dates
import pathlib
import importlib
from time import sleep


logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

In [2]:
legacy_monthly_ic_path = 'users/observatorionieves/MODIS/Andes_MCDS4S5_Yearly_Monthly'
legacy_yearly_ic_path = 'users/observatorionieves/MODIS/Andes_MCDS4S5_Yearly'

Helpful Functions

In [3]:
# set a date simpleTime property to facilitate filtering
def set_date_property(image):
    date = ee.ee_date.Date(image.date().format("YYYY-MM-dd"))
    return image.set("simpleTime", date)

def remove_date_property(image):
    return (ee.image.Image() # Image without any bands or properties
    .addBands(image) # add bands 
    .copyProperties(source= image, exclude=['simpleTime']) # add properties excluding simpleTime  
    )

def filter_collection_by_dates(
    ee_collection: ee.imagecollection.ImageCollection, dates_list: list[str]
) -> ee.imagecollection.ImageCollection:
    """
    Filter an image collection by a list of dates

    Args:
    ee_collection: ee.ImageCollection to filter
    dates_list: list of dates in format "YYYY-MM-DD"

    Returns:
    ee.ImageCollection
    """

    # set a date simpleTime property to facilitate filtering
    ee_collection = ee_collection.map(set_date_property)

    # create ee_list of dates
    ee_dates_list = ee.ee_list.List([ee.ee_date.Date(i_date) for i_date in dates_list])
    ee_filtered_collection = ee_collection.filter(
        ee.filter.Filter.inList("simpleTime", ee_dates_list)
    )

    # remove simpleTime property
    # ee_filtered_collection = ee_filtered_collection.map(remove_date_property)

    return ee_filtered_collection


def get_buffer_dates(target_date, buffer_days=2):
    target_date_dt = datetime.strptime(target_date, "%Y-%m-%d").date()
    buffer_dates = [
        str(target_date_dt + timedelta(days=delta))
        for delta in range(-buffer_days, buffer_days + 1)
    ]
    buffer_dates.remove(target_date)
    return buffer_dates


def get_collection_dates(ee_collection) -> list[str]:
    # get "system:time_start" of all images in image collection
    image_dates = ee_collection.aggregate_array("system:time_start").getInfo()
    # convert milliseconds to date
    image_dates = [
        datetime.fromtimestamp(date / 1000, datetime_UTC).strftime("%Y-%m-%d")
        for date in image_dates
    ]
    return image_dates

def get_image_date(image):
    img_date_milliseconds = image.get("system:time_start").getInfo()
    return datetime.fromtimestamp(img_date_milliseconds / 1000, datetime_UTC).strftime("%Y-%m-%d")

In [2]:
config = {
    "service_credentials_file": "../secrets/ee-observatorionieves-288939dbc1cf.json"
}

In [3]:
# Connect to Earth Engine using Service Account

## ------ GEE CONNECTION ---------
# TODO: Need to verify connections get closed after script finishes
logger.debug("--- Connecting to GEE")
# Connect to GEE using service account for automation

try:
    with open(config['service_credentials_file'], "r") as f:
        service_account_data = json.load(f)
    service_user = service_account_data["client_email"]
    print(service_user)
    
    credentials = ee._helpers.ServiceAccountCredentials(
        email=service_user,
        key_data=json.dumps(service_account_data),
    )
    ee.Initialize(credentials)
    logger.debug("GEE connection successful")
except FileNotFoundError as e:
    logger.error(f"Service account file not found: {e}")
    
except Exception as e:
    logger.error(f"Initializing connection to GEE failed: {e}")

    


osn-imageautomation-dev@ee-observatorionieves.iam.gserviceaccount.com


In [4]:
# Get Area of Interest and DEM data

# load feature collection of Andes
ee_aoi_fc= ee.featurecollection.FeatureCollection("projects/ee-observatorionieves/assets/Modules/Andes")

ee_dem_img = ee.image.Image(
    "projects/ee-observatorionieves/assets/Modules/DEM_SRTM_reproj_MODIS_463_Andes"
)  # Asset was not found

In [9]:
asset = ee.data.getAsset("projects/ee-observatorionieves/assets/Modules/Andes")

In [10]:
print(asset)

{'type': 'TABLE', 'name': 'projects/ee-observatorionieves/assets/Modules/Andes', 'id': 'projects/ee-observatorionieves/assets/Modules/Andes', 'updateTime': '2024-12-04T19:50:00.856890Z', 'sizeBytes': '910486'}


In [21]:
importlib.reload(monthly_export)

<module 'observatorio_ipa.monthly_export' from 'C:\\Users\\erick\\Documents\\Projects\\teleamb\\observatorio_ipa\\src\\observatorio_ipa\\monthly_export.py'>

In [11]:
monthly_export_results = monthly_export.monthly_export_proc(
    monthly_collection_path=legacy_monthly_ic_path,
    name_prefix="Andes_MCDS4S5_Yearly_Monthly",
    aoi_path="projects/ee-observatorionieves/assets/Modules/Andes",
    dem_path="projects/ee-observatorionieves/assets/Modules/DEM_SRTM_reproj_MODIS_463_Andes",

)

In [12]:
print(monthly_export_results)

{'images_pending_export': ['2024-11', '2024-12'], 'images_excluded': [{'2024-12': 'Month incomplete'}], 'images_to_export': ['2024-11'], 'export_tasks': [{'task': 'mock_task', 'image': 'Andes_MCDS4S5_Yearly_Monthly_2024_11', 'target': 'GEE Asset', 'status': 'created'}]}


In [11]:
random_path = 'projects/ee-observatorionieves/assets/Modules/random_path'
random_path_results = assets.list_assets(random_path)


ERROR:root:Asset projects/ee-observatorionieves/assets/Modules/random_path does not exist.


ValueError: Asset projects/ee-observatorionieves/assets/Modules/random_path does not exist.

In [10]:
random_path_results 

NameError: name 'random_path_results' is not defined

In [12]:
# Get MODIS Terra and Aqua Image Collections and respective image dates
monthly_prefix = "MCD10A1_Andes_T48_"
export_ini = "2003-01-01"
export_end = today = datetime.today().strftime('%Y-%m-%d')

ee_terra_ic = ee.imagecollection.ImageCollection(
    "MODIS/061/MOD10A1"
).filterDate(export_ini, export_end)

ee_aqua_ic = ee.imagecollection.ImageCollection(
    "MODIS/061/MYD10A1"
).filterDate(export_ini, export_end)


terra_image_dates = get_collection_dates(ee_terra_ic)
print(f"First Terra image: {terra_image_dates[0]}")
print(f"Last Terra image: {terra_image_dates[-1]}")
print(f"Count of Terra images: {len(terra_image_dates)}\n")

aqua_image_dates = get_collection_dates(ee_aqua_ic)
print(f"First Aqua image: {aqua_image_dates[0]}")
print(f"Last Aqua image: {aqua_image_dates[-1]}")
print(f"Count of Aqua images: {len(aqua_image_dates)}")


First Terra image: 2003-01-01
Last Terra image: 2024-12-14
Count of Terra images: 7989

First Aqua image: 2003-01-01
Last Aqua image: 2024-12-14
Count of Aqua images: 8002


Generate names of yearly images 

In [13]:
monthly_prefix = "MCD10A1_Andes_T48_Y_"
years = list(range(2003, datetime.today().year+1))
print(years)
year_images=[monthly_prefix+str(year) for year in years]
print(year_images[0])
print(year_images[-1])
print(len(year_images))
# Remove current year 
current_year_img = monthly_prefix+str(datetime.today().year)
print(current_year_img)
year_images.remove(current_year_img)
print(year_images[-1])
print(len(year_images))

[2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
MCD10A1_Andes_T48_Y_2003
MCD10A1_Andes_T48_Y_2024
22
MCD10A1_Andes_T48_Y_2024
MCD10A1_Andes_T48_Y_2023
21


## Monthly Images

Generate names of monthly images

In [14]:
monthly_prefix = 'Andes_MCDS4S5_Yearly_Monthly'
months_suffix = [str(month).zfill(2) for month in range(1,13)] 
year_month_suffix = [str(year)+'-'+month for year in years for month in months_suffix]
# remove current month.
year_month_suffix.remove(str(datetime.today().year)+'-'+str(datetime.today().month).zfill(2))
print(year_month_suffix[0])

year_months_images=[monthly_prefix+'_'+month.replace("-","_") for month in year_month_suffix]
print(year_months_images[0])
print(year_months_images[-1])
print(len(year_months_images))


2003-01
Andes_MCDS4S5_Yearly_Monthly_2003_01
Andes_MCDS4S5_Yearly_Monthly_2024_11
263


Get Year-month from exported monthly images

In [15]:
# Get names of monthly images already exported to assets
exported_monthly_images = assets.list_assets(parent=legacy_monthly_ic_path, asset_types=["Image"])
exported_monthly_images = assets.get_asset_names(exported_monthly_images)

# Remove path from asset names
exported_monthly_images = [img.split('/')[-1] for img in exported_monthly_images]

# TODO: Remove images that don't start with required suffix

# Get last 6 characters of asset names (Year-month)
exported_monthly_images = [img[-7:] for img in exported_monthly_images]

# # replace underscores with dashes (Year-month)
exported_monthly_images = [img.replace('_', '-') for img in exported_monthly_images]
print(exported_monthly_images[0])

2000-02


In [16]:
# Identify Monthly images not yet exported 
monthly_images_to_export = list(set(year_month_suffix) - set(exported_monthly_images))
print(monthly_images_to_export)

['2024-11']


From the list of months pending to export, verify if the required information for the month is available otherwise skip export for that month. 

This mostly checks if images >= max leading date exist for the month before current calendar date. For older months this will always be true and for current month, this will always be false. 

In [17]:
target_months_to_keep = []
dates_to_filter_ic = []

for target_month in monthly_images_to_export:
    print(f"Target month: {target_month}")

    # Get first and last day in the month
    first_day_month = date.fromisoformat(target_month+'-01')
    print(f"First day in Month: {first_day_month}")

    last_day_month = first_day_month + relativedelta(months=1, days=-1)
    print(f"Last day in Month: {last_day_month}")

    # Get trailing and leading dates (+-2 days)
    trailing_buffer=2
    trailing_dates = [str(first_day_month - relativedelta(days=i)) for i in range(1,trailing_buffer+1)]
    trailing_dates.sort()
    print(f"Trailing dates: {trailing_dates}")


    leading_buffer=2
    leading_dates = [str(last_day_month + relativedelta(days=i)) for i in range(1,leading_buffer+1)]
    leading_dates.sort()
    print(f"Leading dates: {leading_dates}")
    
    max_leading_date = max(leading_dates)
    print(f"Max leading date: {max_leading_date}")
    min_trailing_date = min(trailing_dates) 
    print(f"Min trailing date: {min_trailing_date}")


    # Check if leading dates are available to determine if month is "complete"
    # No need to check trailing dates because it's implicit that past images are available
    # TODO: Simplify this and only check if images >= max leading date are available
    if (all([date in terra_image_dates for date in leading_dates])):
        print("Leading dates available")
        keep_target_month = True
        
    else:
        # check if there are other images after the leading dates
        print("Leading dates not available")
        bigger_dates = [date for date in terra_image_dates if date > max_leading_date]
        if bigger_dates:
            print("There are images after the leading dates")
            print(bigger_dates)
            keep_target_month = True
        else:
            print("There are no images after the leading dates")
            keep_target_month = False

    # If month is to be exported, get all dates to filter IC
    if keep_target_month:
        target_months_to_keep.append(target_month)
        # Get all terra dates between first day of month - buffer and last day of month + buffer
        available_terra_dates = [
            date
            for date in terra_image_dates
            if date >= min_trailing_date and date <= max_leading_date
        ]
        dates_to_filter_ic.extend(available_terra_dates)

print(f"\n")
print(f"target months to keep: {target_months_to_keep}")
print(f"Dates to filter IC: {len(dates_to_filter_ic)}")
print(f"first in filtered dates: {dates_to_filter_ic[0]}")
print(f"last in filtered dates: {dates_to_filter_ic[-1]}")

Target month: 2024-11
First day in Month: 2024-11-01
Last day in Month: 2024-11-30
Trailing dates: ['2024-10-30', '2024-10-31']
Leading dates: ['2024-12-01', '2024-12-02']
Max leading date: 2024-12-02
Min trailing date: 2024-10-30
Leading dates available


target months to keep: ['2024-11']
Dates to filter IC: 34
first in filtered dates: 2024-10-30
last in filtered dates: 2024-12-02


In [18]:
# Verify which required dates are available in Terra and Aqua
terra_dates_to_keep = [date for date in terra_image_dates if date in dates_to_filter_ic]
aqua_dates_to_keep = [date for date in aqua_image_dates if date in dates_to_filter_ic]  

# Check if Terra and Aqua have same amount of images
if len(terra_dates_to_keep) != len(aqua_dates_to_keep):
    print("Terra and Aqua have different amount of images")
    print(f"Terra: {len(terra_dates_to_keep)}")
    print(f"Aqua: {len(aqua_dates_to_keep)}")
    # Check if there are missing dates in Terra or Aqua
    missing_aqua_dates = list(set(terra_dates_to_keep) - set(aqua_dates_to_keep))
    print(f"Missing dates ({len(missing_aqua_dates)}): {missing_aqua_dates}")
    # Check if there are missing dates in Aqua or Terra
    missing_terra_dates = list(set(aqua_dates_to_keep) - set(terra_dates_to_keep))
    print(f"Missing dates ({len(missing_terra_dates)}): {missing_terra_dates}")
else:
    print("Terra and Aqua have the same amount of images")
    print(f"Terra: {len(terra_dates_to_keep)}")
    print(f"Aqua: {len(aqua_dates_to_keep)}")

Terra and Aqua have the same amount of images
Terra: 34
Aqua: 34


Reduce original MODIS Terra and Aqua collections to required images only

In [19]:
# Applying date filter to Terra
ee_filtered_terra_ic = filter_collection_by_dates(ee_terra_ic, dates_to_filter_ic)
print(f"Original Terra IC: {ee_terra_ic.size().getInfo()}")
print(f"filtered Terra IC: {ee_filtered_terra_ic.size().getInfo()}")

Original Terra IC: 7989
filtered Terra IC: 34


In [20]:
# Applying date filter to Aqua
ee_filtered_aqua_ic = filter_collection_by_dates(ee_aqua_ic, dates_to_filter_ic)
print(f"Original Aqua IC: {ee_aqua_ic.size().getInfo()}")
print(f"filtered Aqua IC: {ee_filtered_aqua_ic.size().getInfo()}")

Original Aqua IC: 8002
filtered Aqua IC: 34


Process Terra and Aqua images

In [21]:
# Step 0 : Reclassify Landcover for Terra and Aqua
MODstep00 = binary.ic_snow_landcover_reclass(
    ee_filtered_terra_ic, ee_aoi_fc, 40
)  


MYDstep00 = binary.ic_snow_landcover_reclass(
    ee_filtered_aqua_ic, ee_aoi_fc, 40
)  


In [147]:
importlib.reload(merge)

<module 'observatorio_ipa.merge' from 'C:\\Users\\erick\\Documents\\Projects\\teleamb\\observatorio_ipa\\src\\observatorio_ipa\\merge.py'>

In [22]:
# step 1: Merge TERRA and AQUA collections
step01 = merge.merge(MODstep00, MYDstep00)
print(f"step01: {step01.size().getInfo()}")
print(f"step01: {step01.first().bandNames().getInfo()}")

step01: 34
step01: ['TAC', 'QA_CR']


In [23]:
# Only keep dates that have trailing and leading buffer images
# This is done to prevent errors for images that don't have leading and trailing images. 
# get dates of images from step01
step01_dates = get_collection_dates(step01)
# keep only dates that have trailing and leading images
keep_dates = []
for target_date in step01_dates:
    step01_dates_buffer = get_buffer_dates(target_date, buffer_days=2)
    if all([date in step01_dates for date in step01_dates_buffer]):
        keep_dates.append(target_date)

print(f"Dates in step01: {len(step01_dates)}")
print(f"Dates with buffer: {len(keep_dates)}")


Dates in step01: 34
Dates with buffer: 30


In [24]:
# Convert list of dates to ee_list in milliseconds
# this is done in two steps. First convert ee_List of to ee_date to get dates in milliseconds
# then download the list and convert to a simple ee_list of numbers. 
ee_keep_dates_list = ee.ee_list.List([ee.ee_date.Date(_date) for _date in keep_dates])
keep_dates_list = ee_keep_dates_list.getInfo()
if keep_dates_list:
    keep_dates_list = [item['value'] for item in keep_dates_list]
else:
    keep_dates_list = []
ee_keep_dates_list = ee.ee_list.List(keep_dates_list)
ee_keep_dates_list.length().getInfo()

30

In [187]:
importlib.reload(temporal)

<module 'observatorio_ipa.temporal' from 'C:\\Users\\erick\\Documents\\Projects\\teleamb\\observatorio_ipa\\src\\observatorio_ipa\\temporal.py'>

In [None]:
# step 2: Impute TAC values from temporal time series
step02 = temporal.ic_impute_tac_temporal(step01)
print(f"step02: {step02.size().getInfo()}")
print(f"step02: {step02.first().bandNames().getInfo()}")

Imputed 11: 30
Imputed 11: ['TAC', 'QA_CR']
Imputed 21: 30
Imputed 21: ['TAC', 'QA_CR']
Imputed 12: 30
Imputed 12: ['TAC', 'QA_CR']


In [26]:
# Step 4: Impute TAC values from spatial neighbors
step04 = spatial_4.ic_impute_TAC_spatial4(step02)
print(f"step04: {step04.size().getInfo()}")
print(f"step04: {step04.first().bandNames().getInfo()}")

step04: 30
step04: ['TAC', 'QA_CR']


In [27]:
# Step 5: Impute TAC values using DEM and TAC data for spatial neighboring pixels
step05 = spatial_8.ic_impute_tac_spatial_dem(step04, ee_dem_img)
print(f"step05: {step05.size().getInfo()}")
print(f"step05: {step05.first().bandNames().getInfo()}")

step05: 30
step05: ['TAC', 'QA_CR']


In [28]:
def split_cloud_snow_bands(image):
    """
    add separate mask bands for cloud and snow that are mutually exclusive

    Each new band is a binary mask (0, 100) where 0 is the absence of the feature and 100 is the presence of the feature.
    If pixel value is a cloud, then Cloud_TAC = 100 and Snow_TAC = 0
    if pixel value is snow, then Cloud_TAC = 0 and Snow_TAC = 100

    """
    #! Why is it setting system:time_start again, if it's already starting with original image?
    #! Why add QA again if it's not used later and it's the same as QA_CR?
    Ctac = image.select("TAC").eq(0).multiply(100).rename("Cloud_TAC")
    Stac = image.select("TAC").eq(100).multiply(100).rename("Snow_TAC")

    return (
        image.addBands(Ctac)
        .addBands(Stac)
        .set("system:time_start", image.get("system:time_start"))
    )


cloudSnowCollection = step04.map(split_cloud_snow_bands).select("Cloud_TAC", "Snow_TAC", "QA_CR")
print(f"cloudSnowCollection: {cloudSnowCollection.size().getInfo()}")
print(f"cloudSnowCollection: {cloudSnowCollection.first().bandNames().getInfo()}")

Reduce collections to Monthly Images

In [None]:
# Consolidate to monthly images and Calculate monthly means of Snow_TAC and Cloud_TAC bands
def monthly_mean(ym):
    ym=ee.ee_string.String(ym)
    i_year = ee.ee_number.Number.parse(ym.slice(0, 4))
    i_month = ee.ee_number.Number.parse(ym.slice(5))

    selected = cloudSnowCollection.filter(
        ee.filter.Filter.calendarRange(i_year, i_year, "year")
    ).filter(ee.filter.Filter.calendarRange(i_month, i_month, "month"))

    Snow_mean = selected.select("Snow_TAC").mean()
    Cloud_mean = selected.select("Cloud_TAC").mean()
    imageReturn = ee.image.Image([Snow_mean, Cloud_mean]).clip(ee_aoi_fc)

    return (
        imageReturn.set("year", i_year)
        .set("month", i_month)
        .set("system:time_start", ee.ee_date.Date.fromYMD(i_year, i_month, 1).millis())
    )

ee_monthly_imgs_list = ee.ee_list.List(monthly_images_to_export)
TACbyYearMonth = ee.imagecollection.ImageCollection.fromImages(
    ee_monthly_imgs_list.map(monthly_mean)
)

print(f"TACbyYearMonth: {TACbyYearMonth.size().getInfo()}")
print(f"TACbyYearMonth: {TACbyYearMonth.first().bandNames().getInfo()}")

TACbyYearMonth: 1
TACbyYearMonth: ['Snow_TAC', 'Cloud_TAC']


In [30]:
# Check dates of images in TACbyYearMonth
TACbyYearMonth_dates = get_collection_dates(TACbyYearMonth)
TACbyYearMonth_dates.sort()
TACbyYearMonth_dates

['2024-11-01']

In [None]:
from observatorio_ipa.defaults import DEFAULT_SCALE, DEFAULT_CHI_PROJECTION

export_tasks = []
image_name = ""
for month in TACbyYearMonth_dates:
    try:
        ee_image = TACbyYearMonth.filterDate(month).first()
        image_ym = get_image_date(ee_image)
        image_name = monthly_prefix + image_ym[0:7].replace("-", "_")
        print(f"Image name: {image_name}")
    
    
        ee_task = ee.batch.Export.image.toAsset(
            image=ee_image,
            description = image_name,
            assetId= pathlib.Path(
                            legacy_monthly_ic_path, image_name
                        ).as_posix(),
            region=ee_aoi_fc,
            scale=DEFAULT_SCALE,
            crs=DEFAULT_CHI_PROJECTION,
            max_pixels=180000000
        )

        export_tasks.append(
                {"task": ee_task, "target": "GEE Asset", "image": image_name}
            )
    except:
        logger.error(f"Error creating export task for {image_name}")
    

Image name: MCD10A1_Andes_T48_2024_11


In [None]:
# Start tasks
for task in export_tasks:
    try:
        task["task"].start()
    except Exception as e:
        msg = f"Export Task failed for {task['target']} {task['image']}: {task['task']}"
        logger.error(e)

In [None]:
# Check exports until all are complete or fail
export_running = True
tasks_finished = []
final_task_status = []
finished_status = ["COMPLETED", "FAILED", "CANCELLED", "FAILED", "UNSUBMITTED"]
pending_status = ["SUBMITTED", "READY", "RUNNING"]
while export_running:
    # Assume all finished unless told otherwise
    export_running = False
    for i, task in enumerate(export_tasks):
        if i not in tasks_finished:
            status = task["task"].status()
            msg = f"{task['image']} to {task['target']}: {status['state']}"
            if status["state"] in pending_status:
                # keep loop running if there's at least one unfinished task
                export_running = True
            elif status["state"] in finished_status:
                final_task_status.append(msg)
                tasks_finished.append(i)
    if export_running:
        sleep(60)

In [200]:
assets.list_assets(
    parent="projects/ee-observatorionieves/assets/Modules"
)

[{'name': 'projects/ee-observatorionieves/assets/Modules/Andes',
  'type': 'TABLE'},
 {'name': 'projects/ee-observatorionieves/assets/Modules/DEM_SRTM_reproj_MODIS_463_Andes',
  'type': 'IMAGE'}]

Some daily images might be missing in MODIS Terra or Aqua IC. So we will generate the daily image names from MODIS directly


In [15]:
#! Terra and Aqua don't have the same amount of images in same interval
#! What to do when images are missing in one of the collections?
missing_in_terra = list(set(aqua_image_dates) - set(terra_image_dates))
missing_in_aqua = list(set(terra_image_dates) - set(aqua_image_dates))
missing_in_terra.sort()
missing_in_aqua.sort()  
print(f"Missing in Terra ({len(missing_in_terra)}): {missing_in_terra}")
print(f"Missing in Aqua ({len(missing_in_aqua)}): {missing_in_aqua}")

Missing in Terra (30): ['2003-12-17', '2003-12-18', '2003-12-19', '2003-12-20', '2003-12-21', '2003-12-22', '2003-12-23', '2008-12-21', '2008-12-22', '2016-02-19', '2016-02-20', '2016-02-21', '2016-02-22', '2016-02-23', '2016-02-24', '2016-02-25', '2016-02-26', '2016-02-27', '2022-10-11', '2022-10-12', '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16', '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20', '2022-10-21', '2022-10-22']
Missing in Aqua (17): ['2022-04-01', '2022-04-02', '2022-04-03', '2022-04-04', '2022-04-05', '2022-04-06', '2022-04-07', '2022-04-08', '2022-04-09', '2022-04-10', '2022-04-11', '2022-04-12', '2022-04-13', '2022-04-14', '2022-04-15', '2022-04-16', '2024-12-12']


In [13]:
# Mock images in in Assets excluding a few dates
exclude_dates = ["2024-08-01", "2024-12-01"]
mock_daily_image_dates = list(set(terra_image_dates) - set(exclude_dates))
mock_daily_images = [monthly_prefix + date.replace("-", "_") for date in mock_daily_image_dates]
print(mock_daily_images[0])
print(f"Mock daily images: {len(mock_daily_images)}")
print(f"Terra daily images: {len(terra_image_dates)}")

MCD10A1_Andes_T48_2004_03_19
Mock daily images: 7985
Terra daily images: 7987


In [14]:
# exported_daily_images=assets.list_assets(
#     parent=legacy_daily_ic_path,
#     asset_types="Images"
# )


exported_daily_images = mock_daily_images

# Extract date from asset names YYYY-MM-DD
exported_daily_img_dates = [
    date[-10:].replace("_", "-")
    for date in exported_daily_images
]

# Get missing dates
daily_imgs_to_export_dt = list(set(terra_image_dates) - set(exported_daily_img_dates))
print(f"Missing daily images: {len(daily_imgs_to_export_dt)}")

Missing daily images: 2


In [89]:
# Assess if required buffer images are available otherwise remove target image from list

# Get buffer images for missing dates +- 2 days

def get_buffer_dates(target_date, buffer_days=2):
    target_date_dt = datetime.strptime(target_date, "%Y-%m-%d").date()
    buffer_dates = [
        str(target_date_dt+timedelta(days=delta)) for delta in range(-buffer_days, buffer_days+1)
    ]
    buffer_dates.remove(target_date)
    return buffer_dates

buffer_days=2
missing_dates_with_buffer = []
target_ic_dates=[]
for target_date in daily_imgs_to_exoprt_dt:
    buffer_dates = get_buffer_dates(target_date, buffer_days)
    print(f"{target_date}: {buffer_dates}")

    # Check if buffer images are in Terra daily images
    if (all (date in terra_image_dates for date in buffer_dates)):
        print(f"{target_date}: All Buffer images found")
        missing_dates_with_buffer.append(target_date)
        target_ic_dates.append(target_date)
        target_ic_dates.extend(buffer_dates)
    else:
        print(f"{target_date}: Not all Buffer images found")
        print("Missing buffer images")
        missing_buffer_dates = list(set(buffer_dates) - set(terra_image_dates))
        print(missing_buffer_dates)

print(f"Missing daily images: {len(missing_dates_with_buffer)}")
print(f"Missing daily images with buffer: {len(missing_dates_with_buffer)}")

2024-12-01: ['2024-11-29', '2024-11-30', '2024-12-02', '2024-12-03']
2024-12-01: All Buffer images found
2024-08-01: ['2024-07-30', '2024-07-31', '2024-08-02', '2024-08-03']
2024-08-01: All Buffer images found
Missing daily images: 2
Missing daily images with buffer: 2


In [115]:
target_ic_dates.sort()
target_ic_dates

['2024-07-30',
 '2024-07-31',
 '2024-08-01',
 '2024-08-02',
 '2024-08-03',
 '2024-11-29',
 '2024-11-30',
 '2024-12-01',
 '2024-12-02',
 '2024-12-03']

# Filter Terra Image collection to dates of interest

In [126]:
# convert target_ic_dates to list of dates in google earth engine format
ee_dates_list = ee.ee_list.List([ee.ee_date.Date(date) for date in target_ic_dates])
# set a date simpleTime property to facilitate filtering
def set_date_property(image):
    date = ee.ee_date.Date(image.date().format('YYYY-MM-dd'))
    return image.set('simpleTime', date)

ee_Terra_ic = ee_terra_ic.map(set_date_property)

# Applying date filter
ee_filtered_terra_ic = ee_Terra_ic.filter(ee.filter.Filter.inList("simpleTime", ee_dates_list))
print(f"Original Terra IC: {ee_Terra_ic.size().getInfo()}")
print(f"filtered by simpleTime: {ee_filtered_terra_ic.size().getInfo()}")


Original Terra IC: 7986
filtered by simpleTime: 10


In [128]:
# Filter Acqua images
ee_MYD_ic = ee_aqua_ic.map(set_date_property)

# Applying date filter
ee_filtered_aqua_ic = ee_MYD_ic.filter(ee.filter.Filter.inList("simpleTime", ee_dates_list))
print(f"Original Aqua IC: {ee_MYD_ic.size().getInfo()}")
print(f"filtered by simpleTime: {ee_filtered_aqua_ic.size().getInfo()}")

Original Aqua IC: 7999
filtered by simpleTime: 10


In [None]:
# Step 0 : Reclassify Landcover
MODstep00 = binary.ic_snow_landcover_reclass(
    ee_terra_ic, AOI, 40
)  # .filter(ee.Filter.calendarRange(12,12, 'month'))
MYDstep00 = binary.ic_snow_landcover_reclass(
    ee_aqua_ic, AOI, 40
)  # .filter(ee.Filter.calendarRange(12,12, 'month'))