In [1]:
import ee

In [7]:
# -*- coding: utf-8 -*-
"""
Daily *daytime-only* radiation over monitoring sites → wide CSV by year (Python + Earth Engine)

Generic, memory-lean approach:
- Register each sensor in SENSORS with: collection id, band names, units ("J_hour" or "W"),
  slice duration (seconds per image, for W→J), a daytime mask band (SWdown-like), and a min buffer radius.
- For each sensor & day:
  * Build one masked day-collection (daytime only: SWdown > 0).
  * Sum daily energy for SWdown and Rn (SWnet+LWnet), convert to MJ.
  * Reduce the 2-band image over small station buffers (no .clip) → columns per station.
- Join all sensors by 'date' into a wide per-year table and export CSV.

Edit these blocks as needed:
- MONITOR_ASSET_IDS
- START / END
- SENSORS (add/modify sensors)
"""

import ee
# ee.Authenticate()  # run once per machine if needed
ee.Initialize(project="cuenca-soil-moisture")

# ----------------------------- CONFIG -----------------------------
MONITOR_ASSET_IDS = [
    "projects/cuenca-soil-moisture/assets/Zhurucay_monitoring",
    "projects/cuenca-soil-moisture/assets/Quinuas_monitoring",
]

START = ee.Date("2016-01-01")
END   = ee.Date("2025-04-30")

DEBUG = True
TILE_SCALE = 2   # bump to 3–4 if you hit memory issues

# Sensor registry (add more following the same schema)
SENSORS = [
    dict(
        name="ERA5",
        collection="ECMWF/ERA5_LAND/HOURLY",
        bands=dict(
            SWdown="surface_solar_radiation_downwards",  # J m^-2 per hour
            SWnet ="surface_net_solar_radiation",        # J m^-2 per hour
            LWnet ="surface_net_thermal_radiation"       # J m^-2 per hour
        ),
        units="J_hour",       # already energy per hour (J)
        dt_s=None,            # not used for J_hour
        min_buffer_m=2000,
        sw_mask_band="surface_solar_radiation_downwards"
    ),
    dict(
        name="MERRA",
        collection="NASA/GSFC/MERRA/rad/2",
        bands=dict(
            SWdown="SWGDN",   # W m^-2 (hourly mean)
            SWnet ="SWGNT",   # W m^-2
            LWnet ="LWGNT"    # W m^-2
        ),
        units="W",           # power → multiply by dt_s to get energy (J)
        dt_s=3600,           # seconds per slice
        min_buffer_m=12000,
        sw_mask_band="SWGDN"
    ),
    dict(
        name="GLDAS",
        collection="NASA/GLDAS/V021/NOAH/G025/T3H",
        bands=dict(
            SWdown="SWdown_f_tavg",  # W m^-2 (3-hour mean)
            SWnet ="Swnet_tavg",     # W m^-2
            LWnet ="Lwnet_tavg"      # W m^-2
        ),
        units="W",
        dt_s=3*3600,         # seconds per 3-hour slice
        min_buffer_m=20000,
        sw_mask_band="SWdown_f_tavg"
    ),
]

# ----------------------------- HELPERS -----------------------------
def sanitize_id(s: ee.String) -> ee.String:
    """Short, safe identifier for CSV suffixes."""
    s = ee.String(s).replace("[^A-Za-z0-9_]", "_", "g")
    starts_digit = s.match("^[0-9]").length().gt(0)
    s = ee.String(ee.Algorithms.If(starts_digit, ee.String("st_").cat(s), s))
    s = s.replace("_+", "_", "g").slice(0, 40)
    return ee.String(ee.Algorithms.If(s.length().gt(0), s, "st_unknown"))

def pick_site_name(f: ee.Feature) -> ee.String:
    """Robust site name from common properties; fallback to feature id."""
    p = f.propertyNames()
    return ee.String(
        ee.Algorithms.If(
            p.contains("site"), f.get("site"),
            ee.Algorithms.If(
                p.contains("Codigo"), f.get("Codigo"),
                ee.Algorithms.If(p.contains("name"), f.get("name"), f.id())
            )
        )
    )

def list_days(start: ee.Date, end: ee.Date) -> ee.List:
    """List of {'day': d, 'next': d+1} covering [start, end] inclusive."""
    end_excl = end.advance(1, "day")
    n = end_excl.difference(start, "day")
    return ee.List.sequence(0, n.subtract(1)).map(
        lambda i: ee.Dictionary({
            "day":  start.advance(ee.Number(i), "day"),
            "next": start.advance(ee.Number(i), "day").advance(1, "day"),
        })
    )

def product_buffers(stations_fc: ee.FeatureCollection, sample_img: ee.Image, min_meters: int) -> ee.FeatureCollection:
    """
    Buffer each station by max(0.6 * native pixel size, min_meters).
    Copy original properties so 'station_id' stays available in reductions.
    """
    scale = ee.Number(ee.Image(sample_img).projection().nominalScale())
    r = scale.multiply(0.6).max(ee.Number(min_meters))
    return stations_fc.map(lambda f: ee.Feature(ee.Feature(f).geometry().buffer(r)).copyProperties(f)) \
                      .set({"buffer_m": r, "scale_m": scale})

def reduce_to_dict_by_station(img: ee.Image,
                              fc_buffers: ee.FeatureCollection,
                              scale_num: float,
                              band_list: ee.List) -> ee.Dictionary:
    """
    Reduce image over each buffer, returning a flat dictionary:
      { <band>_<stationId>: mean, ... }
    """
    col = img.select(band_list).reduceRegions(
        collection=fc_buffers,
        reducer=ee.Reducer.mean(),
        scale=scale_num,
        tileScale=TILE_SCALE
    )
    feats = col.toList(col.size())

    def _accum(i, acc):
        f = ee.Feature(feats.get(i))
        sid = ee.String(f.get("station_id"))
        def _one_band(k, acc2):
            k = ee.String(k)
            has_k = f.propertyNames().contains(k)
            add_if = ee.Dictionary(
                ee.Algorithms.If(
                    has_k,
                    ee.Dictionary().set(k.cat("_").cat(sid), f.get(k)),
                    ee.Dictionary({})
                )
            )
            return ee.Dictionary(acc2).combine(add_if, True)
        per_station = ee.List(band_list).iterate(_one_band, ee.Dictionary({}))
        return ee.Dictionary(acc).combine(ee.Dictionary(per_station), True)

    return ee.Dictionary(ee.List.sequence(0, feats.size().subtract(1)).iterate(_accum, ee.Dictionary({})))

def feat_from_dict(date: ee.Date, dct: ee.Dictionary, tag: str) -> ee.Feature:
    """Wrap the per-day dictionary into a feature row with 'date' and 'tag'."""
    return ee.Feature(
        None,
        ee.Dictionary(dct).set("date", ee.Date(date).format("YYYY-MM-dd")).set("tag", tag)
    )

def inner_join_by_date(fc_a: ee.FeatureCollection, fc_b: ee.FeatureCollection) -> ee.FeatureCollection:
    """Inner-join two tables by 'date' and merge properties."""
    join = ee.Join.inner()
    flt  = ee.Filter.equals(leftField="date", rightField="date")
    joined = join.apply(fc_a, fc_b, flt)
    def _merge(row):
        row = ee.Feature(row)
        a = ee.Feature(row.get("primary"))
        b = ee.Feature(row.get("secondary"))
        merged = ee.Dictionary(a.toDictionary()).combine(b.toDictionary(), True)
        return ee.Feature(None, merged)
    return ee.FeatureCollection(joined.map(_merge))

# ----------------------------- STATIONS -----------------------------
def load_stations(asset_id: str, asset_label: str) -> ee.FeatureCollection:
    fc = ee.FeatureCollection(asset_id)
    def _map(f):
        site = pick_site_name(f)
        site2 = ee.String(asset_label).cat(":").cat(site)
        return ee.Feature(f.geometry(), f.toDictionary()).set({
            "site": site,
            "asset": asset_label,
            "site2": site2
        })
    return fc.map(_map)

stations = (ee.FeatureCollection([])
            .merge(load_stations(MONITOR_ASSET_IDS[0], "Zhurucay"))
            .merge(load_stations(MONITOR_ASSET_IDS[1], "Quinuas"))
            .map(lambda f: ee.Feature(f).set({"station_id": sanitize_id(ee.String(f.get("site")))})))

if DEBUG:
    print("Stations:", stations.size().getInfo())
    print("Example stations:", stations.limit(3).getInfo())

# ----------------------------- GENERIC DAILY BUILDER -----------------------------
def daily_daytime_fc(sensor: dict, y_start: ee.Date, y_end: ee.Date) -> ee.FeatureCollection:
    """
    For one sensor over [y_start, y_end], produce per-day rows with columns:
      <SENSOR>_day_SWdown_MJ_<station>,  <SENSOR>_day_Rn_MJ_<station>
    Memory-lean: one masked day-collection reused for SWdown and Rn.
    """
    name   = sensor["name"]
    bands  = sensor["bands"]
    units  = sensor["units"]
    dt_s   = sensor["dt_s"]
    collid = sensor["collection"]
    swmask = sensor["sw_mask_band"]
    minbuf = sensor["min_buffer_m"]

    # Pre-filter for the year window (+1 day for end-exclusive)
    ic_base = (ee.ImageCollection(collid)
               .filterDate(y_start, y_end.advance(1, "day"))
               .select([bands["SWdown"], bands["SWnet"], bands["LWnet"]]))

    sample = ic_base.first()
    # Return empty FC fast if no data
    if DEBUG:
        print(f"[{name}] year window has data:", bool(sample.getInfo() is not None))

    return ee.FeatureCollection(
        ee.Algorithms.If(
            sample,
            (lambda _sample=sample: (
                # Compute scale (client) and buffers once per sensor/year
                (lambda scale_num, fc_buf, buf_geom:
                    ee.FeatureCollection(
                        list_days(y_start, y_end).map(
                            lambda d: _per_day_row(
                                name, bands, units, dt_s,
                                ic_base,
                                ee.Date(ee.Dictionary(d).get("day")),
                                ee.Date(ee.Dictionary(d).get("next")),
                                swmask,
                                fc_buf, buf_geom,
                                scale_num
                            )
                        )
                    )
                )(
                    float(ee.Image(_sample).projection().nominalScale().getInfo()),
                    product_buffers(stations, _sample, minbuf),
                    product_buffers(stations, _sample, minbuf).geometry()
                )
            ))(),
            ee.FeatureCollection([])  # no data in this year/sensor
        )
    )

def _per_day_row(name, bands, units, dt_s,
                 ic_base, day: ee.Date, nxt: ee.Date,
                 swmask: str,
                 fc_buf: ee.FeatureCollection, buf_geom: ee.Geometry,
                 scale_num: float) -> ee.Feature:
    """
    Build one per-day feature for a sensor:
      - Mask daytime (SWdown > 0)
      - Sum SWdown and (SWnet+LWnet), convert to MJ
      - Reduce over buffers → dictionary
      - Wrap into a feature row with 'date'
    """
    # One masked day-collection reused
    day_ic_masked = (ic_base.filterDate(day, nxt)
                           .filterBounds(buf_geom)
                           .map(lambda im: ee.Image(im).updateMask(ee.Image(im).select(swmask).gt(0))))

    # SWdown daily energy
    sw_sum = day_ic_masked.select(bands["SWdown"]).sum()
    if units == "J_hour":
        sw_mj = sw_sum.divide(1e6)  # J → MJ
    else:  # "W"
        sw_mj = sw_sum.multiply(dt_s).divide(1e6)  # W * s → J → MJ
    sw_mj = sw_mj.rename(f"{name}_day_SWdown_MJ")

    # Rn daily energy = (SWnet + LWnet)
    rn_sum = day_ic_masked.select(bands["SWnet"]).sum().add(day_ic_masked.select(bands["LWnet"]).sum())
    if units == "J_hour":
        rn_mj = rn_sum.divide(1e6)
    else:
        rn_mj = rn_sum.multiply(dt_s).divide(1e6)
    rn_mj = rn_mj.rename(f"{name}_day_Rn_MJ")

    img = ee.Image.cat([sw_mj, rn_mj]).set("system:time_start", day.millis())

    dct = reduce_to_dict_by_station(img, fc_buf, scale_num, img.bandNames())
    return feat_from_dict(day, dct, f"{name}_DAY")

# ----------------------------- YEAR LOOP & EXPORT -----------------------------
start_year = int(START.get("year").getInfo())
end_year   = int(END.get("year").getInfo())
years_py   = list(range(start_year, end_year + 1))

def export_year(y_int: int):
    y_start_ee = ee.Date.fromYMD(y_int, 1, 1)
    y_end_ee   = ee.Date.fromYMD(y_int, 12, 31)

    # Clip to [START, END]
    y_start = ee.Date(ee.Algorithms.If(y_start_ee.millis().lt(START.millis()), START, y_start_ee))
    y_end   = ee.Date(ee.Algorithms.If(y_end_ee.millis().gt(END.millis()),   END,   y_end_ee))

    # Build daily tables per sensor
    daily_named = []
    for sensor in SENSORS:
        fc = daily_daytime_fc(sensor, y_start, y_end)
        daily_named.append((sensor["name"], ee.FeatureCollection(fc)))

    # Use the first sensor as base; inner-join others by date
    base_name, wide = daily_named[0]
    for name, fc in daily_named[1:]:
        wide = inner_join_by_date(wide, fc)

    if DEBUG:
        sizes = {name: fc.size().getInfo() for name, fc in daily_named}
        print(f"Year {y_int} sizes:", sizes, "wide:", wide.size().getInfo())
        first = wide.first()
        if first:
            print("Fields sample:", ee.Feature(first).propertyNames().getInfo())

    task = ee.batch.Export.table.toDrive(
        collection=wide,
        description=f"DAYTIME_Radiation_{y_int}",
        folder="GoogleEarthEngine",
        fileFormat="CSV",
    )
    task.start()
    print(f"Started export: DAYTIME_Radiation_{y_int}")

for y in years_py:
    export_year(y)

print("All yearly exports started. Monitor Tasks panel or use ee.batch.Task.list().")


Stations: 4
Example stations: {'type': 'FeatureCollection', 'columns': {'station_id': 'String'}, 'features': [{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-79.234708, -3.062612]}, 'id': '1_2_00000000000000000000', 'properties': {'asset': 'Zhurucay', 'name': 'Zhurucay_monitoring', 'site': 'Zhurucay_monitoring', 'site2': 'Zhurucay:Zhurucay_monitoring', 'station_id': 'Zhurucay_monitoring'}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-79.22222577105055, -2.7831219944997074]}, 'id': '2_00000000000000000000', 'properties': {'Altura': 3962, 'Codigo': 'QP1', 'Tipo': 'Precipitation', 'X': 697621.9592, 'Y': 9692230.0653, 'asset': 'Quinuas', 'site': 'QP1', 'site2': 'Quinuas:QP1', 'station_id': 'QP1'}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-79.1931133268206, -2.782269566783723]}, 'id': '2_00000000000000000001', 'properties': {'Altura': 3633, 'Codigo': 'QP2', 'Tipo': 'Precipitation', 'X': 701109.1811, 'Y': 9692380.1398, 'asse

EEException: User memory limit exceeded.