In [6]:
import ee

In [8]:
# -*- coding: utf-8 -*-
"""
Generic daily means → wide CSV (per region) + optional map preview + optional GeoTIFFs.

Replicates and generalizes the GEE JS script:
- Computes daily means using local-day windows (timezone offset).
- Reduces per region polygon(s) and pivots to WIDE: {date, SAT_region1, SAT_region2, ...}
- Exports per satellite × year as CSV to Google Drive.
- Optional: map preview (one month), annual/monthly GeoTIFF exports.

Notes:
- GLDAS: convert kg/m^2 (0–10 cm) to m^3/m^3 with .divide(100.0) and rename('SM')
  **BEFORE** daily means (exactly like the JS script). This prevents empty/nameless-band days.
"""

import ee

# If you haven't authenticated this machine before, run once:
# ee.Authenticate()

# Initialize with your GCP project that has Earth Engine API enabled
ee.Initialize(project="cuenca-soil-moisture")

# Optional map preview (requires geemap if set True)
USE_GEEMAP = False
try:
    if USE_GEEMAP:
        import geemap
except Exception:
    USE_GEEMAP = False

# ---------------------------------------------------------------------
# CONFIG (edit this block)
# ---------------------------------------------------------------------
YEARS = dict(start=2020, end=2020)  # inclusive start, inclusive end
TZ_OFFSET_H = -5                    # Local timezone offset hours (e.g., Ecuador ~ UTC-5)

# Regions: provide asset path + label for each watershed
REGIONS = [
    ("projects/cuenca-soil-moisture/assets/watershed_azuay",    "Azuay"),
    ("projects/cuenca-soil-moisture/assets/watershed_quinuas",  "Quinuas"),
    ("projects/cuenca-soil-moisture/assets/watershed_zhurucay", "Zhurucay"),
]
# Or pass a custom ee.FeatureCollection with a 'site' property (one feature per site)
REGIONS_FC = None

# Output/preview knobs
DO_MAP_LAYERS = False
MAP_YEAR = YEARS["start"]
MAP_MONTH = 7  # 1..12

EXPORT_ANNUAL_TIFS    = False
EXPORT_ONE_MONTH_TIFS = False

# Add a small buffer (m) around all regions for initial dataset filtering (performance)
FILTER_BUFFER_M = 20000

# Nominal scales for reductions/exports (approx native)
SCALE = {
    "SMAP":  11000,   # SMAP L4 ~9-12 km, use ~11 km
    "ERA5":  11000,   # ERA5-Land 0.1° ~9-11 km
    "GLDAS": 28000,   # GLDAS 0.25° ~25-30 km
}

# ---------------------------------------------------------------------
# HELPERS
# ---------------------------------------------------------------------
def load_regions_fc(regions, regions_fc=None):
    """
    Returns a FeatureCollection with property 'site' for each feature.
    If regions_fc is provided, it's used as-is. Otherwise, each asset is read,
    its geometry taken, and labeled with the provided site name.
    """
    if regions_fc is not None:
        return regions_fc
    feats = []
    for asset, label in regions:
        fc = ee.FeatureCollection(asset)
        geom = fc.geometry()  # light: no union(); just use geometry() of each asset
        feats.append(ee.Feature(geom, {"site": label}))
    return ee.FeatureCollection(feats)


def daily_means_local(ic, start_year, end_year, tz_offset_hours):
    """
    Build server-side daily means using local-day windows:
      For local day D: mean(images in [D-UTCoffset, D+1-UTCoffset))
    Emits a masked dummy image for empty days (same band name as collection's first image),
    then filters to days with data via 'has_data' property.
    """
    start_local = ee.Date.fromYMD(start_year, 1, 1)
    end_local   = ee.Date.fromYMD(end_year + 1, 1, 1)   # exclusive
    n_days      = end_local.difference(start_local, "day")
    days        = ee.List.sequence(0, n_days.subtract(1))

    # Determine band name from the first image in the collection
    first_band = ee.String(
        ee.Image(ee.ImageCollection(ic).first()).bandNames().get(0)
    )

    def one_day(d):
        day_local  = start_local.advance(ee.Number(d), "day")
        next_local = day_local.advance(1, "day")
        start_utc  = day_local.advance(-tz_offset_hours, "hour")
        end_utc    = next_local.advance(-tz_offset_hours, "hour")

        mean_img = ee.ImageCollection(ic).filterDate(start_utc, end_utc).mean()
        has = mean_img.bandNames().size().gt(0)

        # masked dummy with the same band name
        dummy = ee.Image.constant(0).rename(first_band).updateMask(ee.Image(0))

        out = ee.Image(ee.Algorithms.If(has, mean_img, dummy)) \
                .set('system:time_start', day_local.millis()) \
                .set('has_data', has)
        return out

    return ee.ImageCollection(days.map(one_day)).filter(ee.Filter.eq('has_data', True))


def _site_pairs_list(regions_fc):
    """
    Build a small server-side list of [site_name, geometry] pairs.
    (Using ee.List of ee.List pairs is reliable in server-side iteration.)
    """
    feats = regions_fc.toList(regions_fc.size())
    def to_pair(f):
        f = ee.Feature(f)
        return ee.List([ee.String(f.get("site")), f.geometry()])
    return feats.map(to_pair)  # ee.List([[site, geom], [site, geom], ...])


def wide_table_from_daily(ic, regions_fc, scale, sat_prefix):
    """
    Robust per-image reducer:
      - For each image/date, iterate over sites and do a reduceRegion on each geometry.
      - Always returns a Feature with keys: date, <sat>_<site1>, <sat>_<site2>, ...
    """
    site_pairs = _site_pairs_list(regions_fc)  # ee.List of [site, geom]

    def per_image(img):
        img = ee.Image(img).select(0)  # keep single band (named 'SM')
        date_str = ee.Date(img.get("system:time_start")).format("YYYY-MM-dd")

        # Iterate over sites and accumulate {sat_site: mean}
        def add_site(acc, pair):
            acc  = ee.Dictionary(acc)
            pair = ee.List(pair)
            site = ee.String(pair.get(0))
            geom = ee.Geometry(pair.get(1))
            mean_val = img.reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=geom,
                scale=scale,
                bestEffort=True,
                maxPixels=1e13
            ).get(img.bandNames().get(0))
            key = ee.String(sat_prefix).cat("_").cat(site)
            return acc.set(key, mean_val)

        init = ee.Dictionary({"date": date_str})
        out_dict = ee.List(site_pairs).iterate(add_site, init)
        return ee.Feature(None, ee.Dictionary(out_dict))

    return ee.FeatureCollection(ic.map(per_image))


def export_table_to_drive(fc, description):
    """Export a FeatureCollection (wide table) to Drive as CSV."""
    task = ee.batch.Export.table.toDrive(
        collection=fc,
        description=description,
        fileFormat="CSV"
    )
    task.start()
    print(f"Started export: {description}")


def month_mean(ic, year, month):
    """Mean of an image collection over a given month (1..12)."""
    start = ee.Date.fromYMD(year, month, 1)
    end   = start.advance(1, "month")
    return ic.filterDate(start, end).mean()


def export_image_to_drive(img, description, scale, region_fc):
    """Export a single-band image (float) clipped to region_fc as GeoTIFF."""
    geom = ee.Feature(region_fc.first()).geometry()
    task = ee.batch.Export.image.toDrive(
        image=img.toFloat(),
        description=description,
        fileNamePrefix=description.replace(" ", "_"),
        region=geom,
        scale=scale,
        maxPixels=1e13,
        fileFormat="GeoTIFF"
    )
    task.start()
    print(f"Started image export: {description}")


# ---------------------------------------------------------------------
# BUILD REGIONS + BOUNDS
# ---------------------------------------------------------------------
WATERSHEDS_FC = load_regions_fc(REGIONS, REGIONS_FC)
bounds = WATERSHEDS_FC.geometry().buffer(FILTER_BUFFER_M)

# ---------------------------------------------------------------------
# LOAD + PREPARE DATASETS → DAILY LOCAL MEANS
# (Match JS semantics: select/transform/rename to 'SM' BEFORE daily means)
# ---------------------------------------------------------------------
# SMAP L4
smap_raw = (ee.ImageCollection("NASA/SMAP/SPL4SMGP/008")
            .filterBounds(bounds)
            .filterDate(ee.Date.fromYMD(YEARS["start"], 1, 1),
                        ee.Date.fromYMD(YEARS["end"] + 1, 1, 1)))
smap_sm = smap_raw.select("sm_surface").select([0], ["SM"])
SMAP_daily = daily_means_local(smap_sm, YEARS["start"], YEARS["end"], TZ_OFFSET_H)

# ERA5-Land
era5_raw = (ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
            .filterBounds(bounds)
            .filterDate(ee.Date.fromYMD(YEARS["start"], 1, 1),
                        ee.Date.fromYMD(YEARS["end"] + 1, 1, 1)))
era5_sm = era5_raw.select("volumetric_soil_water_layer_1").select([0], ["SM"])
ERA5_daily = daily_means_local(era5_sm, YEARS["start"], YEARS["end"], TZ_OFFSET_H)

# GLDAS Noah v2.1 — IMPORTANT: divide(100) + rename('SM') BEFORE daily means
gldas_raw = (ee.ImageCollection("NASA/GLDAS/V021/NOAH/G025/T3H")
             .filterBounds(bounds)
             .filterDate(ee.Date.fromYMD(YEARS["start"], 1, 1),
                         ee.Date.fromYMD(YEARS["end"] + 1, 1, 1)))
gldas_sm = gldas_raw.select("SoilMoi0_10cm_inst").map(
    lambda img: ee.Image(img).divide(100.0).rename("SM")
)
GLDAS_daily = daily_means_local(gldas_sm, YEARS["start"], YEARS["end"], TZ_OFFSET_H)

# Bundle with scales
daily_ic = {
    "SMAP":  (SMAP_daily,  SCALE["SMAP"]),
    "ERA5":  (ERA5_daily,  SCALE["ERA5"]),
    "GLDAS": (GLDAS_daily, SCALE["GLDAS"]),
}

print("Prepared daily local-time means for:", list(daily_ic.keys()))

# ---------------------------------------------------------------------
# PER-YEAR WIDE CSV EXPORTS
# ---------------------------------------------------------------------
for y in range(YEARS["start"], YEARS["end"] + 1):
    y0 = f"{y}-01-01"
    y1 = f"{y+1}-01-01"
    for sat_key, (ic, scale) in daily_ic.items():
        table = wide_table_from_daily(ic.filterDate(y0, y1), WATERSHEDS_FC, scale, sat_key)
        export_table_to_drive(table, f"{sat_key}_daily_wide_{y}")

# ---------------------------------------------------------------------
# OPTIONAL: MAP PREVIEW (one month per sat × region)
# ---------------------------------------------------------------------
if DO_MAP_LAYERS and USE_GEEMAP:
    Map = geemap.Map()
    Map.center_object(WATERSHEDS_FC, 9)

    VIS = {
        "SMAP":  {"min": 0.05, "max": 0.5, "palette": ['#f7fbff','#c6dbef','#6baed6','#2171b5','#08306b']},
        "ERA5":  {"min": 0.05, "max": 0.5, "palette": ['#fff7ec','#fdd49e','#fc8d59','#d7301f','#7f0000']},
        "GLDAS": {"min": 0.05, "max": 0.5, "palette": ['#f7f4f9','#d4b9da','#c994c7','#df65b0','#980043']},
    }

    for sat_key, (ic, _scale) in daily_ic.items():
        mm = month_mean(ic, MAP_YEAR, MAP_MONTH).select("SM")
        sites = WATERSHEDS_FC.aggregate_array("site").getInfo()
        for s in sites:
            geom = ee.Feature(WATERSHEDS_FC.filter(ee.Filter.eq("site", s)).first()).geometry()
            Map.addLayer(mm.clip(geom), VIS.get(sat_key, {}), f"{sat_key} {s} M{MAP_MONTH} {MAP_YEAR}", False)
    Map

# ---------------------------------------------------------------------
# OPTIONAL: GeoTIFF EXPORTS (annual mean and/or one month)
# ---------------------------------------------------------------------
if EXPORT_ANNUAL_TIFS or EXPORT_ONE_MONTH_TIFS:
    sites = WATERSHEDS_FC.aggregate_array("site")
    site_list = sites.map(lambda s: ee.Dictionary(
        {"name": s, "fc": WATERSHEDS_FC.filter(ee.Filter.eq("site", s))}
    )).getInfo()

    for yy in range(YEARS["start"], YEARS["end"] + 1):
        startY = ee.Date.fromYMD(yy, 1, 1)
        endY   = ee.Date.fromYMD(yy + 1, 1, 1)

        # Annual means per satellite
        annual = {k: ic.filterDate(startY, endY).mean().select("SM")
                  for k, (ic, _s) in daily_ic.items()}

        # Optional month means for MAP_MONTH
        if EXPORT_ONE_MONTH_TIFS:
            startM = ee.Date.fromYMD(yy, MAP_MONTH, 1)
            endM   = startM.advance(1, "month")
            monthly = {k: ic.filterDate(startM, endM).mean().select("SM")
                       for k, (ic, _s) in daily_ic.items()}

        # Exports per site
        for site in site_list:
            name = site["name"]
            fc   = ee.FeatureCollection(site["fc"])
            for sat_key, (_ic, scale) in daily_ic.items():
                if EXPORT_ANNUAL_TIFS:
                    export_image_to_drive(annual[sat_key], f"{sat_key}_annual_{yy}_{name}", scale, fc)
                if EXPORT_ONE_MONTH_TIFS:
                    export_image_to_drive(monthly[sat_key], f"{sat_key}_month_{yy}_M{MAP_MONTH}_{name}", scale, fc)

print("All export tasks (if any) have been started. Check Tasks in the EE Code Editor or via ee.batch.Task.list().")


Prepared daily local-time means for: ['SMAP', 'ERA5', 'GLDAS']
Started export: SMAP_daily_wide_2020
Started export: ERA5_daily_wide_2020
Started export: GLDAS_daily_wide_2020
All export tasks (if any) have been started. Check Tasks in the EE Code Editor or via ee.batch.Task.list().
