<a href="https://colab.research.google.com/github/CheilaBaiao/GEE_SR/blob/main/1)_00_config_ingest_preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title Setup: instalar libs, montar Drive e configurar projeto (rodar 1x por sessão)
# Instalações essenciais (evite repetir em outras células)
%pip -q install numpy pandas geopandas shapely pyproj rasterio rioxarray xarray tqdm pyyaml joblib earthengine-api

# Imports base
import os, json, time, glob, math, gc, warnings, yaml
from pathlib import Path
import numpy as np, pandas as pd
from tqdm.notebook import tqdm

import geopandas as gpd
from shapely.geometry import box

# Montar Google Drive (Colab)
from google.colab import drive
drive.mount('/content/drive')

# --- PASTAS DO PROJETO ---
# Use nomes simples (sem subpastas no 'folder' do GEE) para evitar shards/caminhos esquisitos
BASE_DIR = Path("/content/drive/MyDrive/Pantanal_TippingPoints/index")
RAW_DIR  = BASE_DIR / "raw"
INT_DIR  = BASE_DIR / "interim"
OUT_DIR  = BASE_DIR / "outputs"
LOG_DIR  = BASE_DIR / "logs"
for d in (RAW_DIR, INT_DIR, OUT_DIR, LOG_DIR): d.mkdir(parents=True, exist_ok=True)

# --- CONFIG ÚNICA (sem duplicatas) ---
CONFIG = {
    # Calendário/anos
    "calendar": "bimonthly",              # "monthly" ou "bimonthly"
    "years": list(range(2001, 2011)),

    # Projeção/escala
    "crs_epsg": 31983,                    # SIRGAS 2000 / UTM 23S
    "optical_pixel_m": 30,
    "agg_optical_to_m": 300,              # (usado só em debug/amostragem)

    # Máscaras / NoData
    "nodata": -32768,                     # NoData final em int16
    "mask_water_threshold": 0.05,         # água = MNDWI > 0.05 (bordas rasas fora)
    "min_obs_hint": 2,                    # só informativo p/ QC

    # Janela flex (DJFM mais larga)
    "wet_months": [12, 1, 2, 3],          # DJFM
    "wet_window_months": 4,               # 4 meses em DJFM
    "dry_window_months": 2,               # 2 meses fora de DJFM

    # Dados e flags
    "use_smap_vod": True,                 # (opcional; export SMAP VOD)
    "resume": True,

    # Assets
    "ee_assets": {
        "landsat_collections": {
            "L5_SR": "LANDSAT/LT05/C02/T1_L2",
            "L7_SR": "LANDSAT/LE07/C02/T1_L2",
            "L8_SR": "LANDSAT/LC08/C02/T1_L2",
            "L9_SR": "LANDSAT/LC09/C02/T1_L2"
        },
        "smap_vod": "NASA_USDA/HSL/SMAP10KM_soil_moisture"  # banda 'vod_pm'
    },

    # Caminho do limite no Drive (ajuste se mudar o arquivo)
    "boundary_path": "/content/drive/MyDrive/Pantanal_TippingPoints/Pantanal.shp",

    # Pasta plana no topo do Drive para os exports (evita subpastas no GEE)
    "gee_drive_folder_optical": "Pantanal_TippingPoints_optical",
    "gee_drive_folder_vod":     "Pantanal_TippingPoints_vod"
}

# Persistir CONFIG para referência
with open(BASE_DIR / "config.yaml", "w") as f:
    yaml.safe_dump(CONFIG, f, allow_unicode=True, sort_keys=False)
print("✔ CONFIG salvo em:", BASE_DIR / "config.yaml")

# Helpers genéricos
def log(msg):
    print(time.strftime("[%Y-%m-%d %H:%M:%S]"), msg)

def get_time_slices(years, calendar="bimonthly"):
    ts=[]
    for y in years:
        if calendar=="monthly":
            ts.extend([f"{y}{m:02d}" for m in range(1,13)])
        else:
            ts.extend([f"{y}{m:02d}" for m in (1,3,5,7,9,11)])
    return ts

TIME_SLICES = get_time_slices(CONFIG["years"], CONFIG["calendar"])
print("Amostra de períodos:", TIME_SLICES[:3], "…", TIME_SLICES[-3:])



[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.3/22.3 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.2/62.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
✔ CONFIG salvo em: /content/drive/MyDrive/Pantanal_TippingPoints/index/config.yaml
Amostra de períodos: ['200101', '200103', '200105'] … ['201007', '201009', '201011']


In [3]:
# @title Célula 2 — GEE init, período flex (DJFM) e utilitários

import ee, datetime, time

# --- Inicialização do GEE ---
try:
    ee.Initialize(project="ee-cheilabaiao")
    print("✔ GEE já autenticado.")
except Exception:
    print("⇢ Autenticando no GEE…")
    ee.Authenticate()          # siga o link e cole o token
    ee.Initialize()
    print("✔ GEE autenticado.")

def period_for_yyyymm(yyyymm: str):
    """
    Retorna (start, end) como ee.Date, aplicando janela 'flex':
      - Meses úmidos (CONFIG['wet_months']): avanço de CONFIG['wet_window_months'] meses
      - Demais meses: avanço de CONFIG['dry_window_months'] meses
    """
    y = int(yyyymm[:4]); m = int(yyyymm[4:6])
    start = ee.Date.fromYMD(y, m, 1)
    if m in CONFIG["wet_months"]:
        end = start.advance(CONFIG["wet_window_months"], 'month')
    else:
        end = start.advance(CONFIG["dry_window_months"], 'month')
    return start, end

def log(msg):  # reforço local (caso não esteja no escopo)
    print(time.strftime("[%Y-%m-%d %H:%M:%S]"), msg)

def ee_wait_tasks(task_list, sleep_s=10):
    """Acompanha várias tasks do GEE até concluírem, com log simples."""
    pending = {t.id: t for t in task_list}
    last_state = {}
    while pending:
        done = []
        for tid, t in pending.items():
            st = t.status()
            state = st.get('state','UNKNOWN')
            if last_state.get(tid) != state:
                log(f"Tarefa {tid[:6]}: {state}")
                last_state[tid] = state
            if state in ("COMPLETED","CANCELLED","FAILED"):
                done.append(tid)
        for tid in done:
            pending.pop(tid, None)
        if pending:
            time.sleep(sleep_s)
    log("✔ Todas as tarefas finalizaram.")

# --- (Opcional) Debug rápido: contar cenas por sensor num período ---
def debug_count_scenes(yyyymm: str, region_geom):
    start, end = period_for_yyyymm(yyyymm)
    assets = CONFIG["ee_assets"]["landsat_collections"]
    counts = {}
    for key, col in assets.items():
        n = (ee.ImageCollection(col)
             .filterDate(start, end)
             .filterBounds(region_geom)
             .size())
        counts[key] = n.getInfo()
    print(f"[DEBUG] {yyyymm} — L5:{counts.get('L5_SR',0)}  L7:{counts.get('L7_SR',0)}  "
          f"L8:{counts.get('L8_SR',0)}  L9:{counts.get('L9_SR',0)}")

print("✔ Célula 2 pronta: GEE, janela flex e utilitários carregados.")



✔ GEE já autenticado.
✔ Célula 2 pronta: GEE, janela flex e utilitários carregados.


In [8]:
# @title Célula 3 — Carregar boundary do Drive → ee.Geometry (pant) e pant_simpl
import json
from pathlib import Path
import geopandas as gpd
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union

# (opcional) parâmetro de tolerância para a simplificação (em metros no CRS do projeto)
SIMPLIFY_TOL_M = 500  # ajuste se quiser; 300–1000 m costuma ir bem para region

bpath = Path(CONFIG["boundary_path"])
assert bpath.exists(), f"Limite não encontrado: {bpath}. Ajuste CONFIG['boundary_path']."

# 1) Ler e normalizar
gdf = gpd.read_file(bpath)
assert len(gdf), f"Nenhuma feição encontrada em {bpath}"

# Corrige geometrias inválidas (buffer(0)) e remove vazios
gdf["geometry"] = gdf["geometry"].buffer(0)
gdf = gdf[~gdf.geometry.is_empty & gdf.geometry.notnull()].copy()
assert len(gdf), "Todas as geometrias ficaram vazias após correção."

# 2) Dissolver em um único polígono
if "name" in gdf.columns:
    gdf_diss = gdf.dissolve()
else:
    gdf["_ones_"] = 1
    gdf_diss = gdf.dissolve("_ones_").drop(columns="_ones_", errors="ignore")

geom_proj = gdf_diss.geometry.values[0]
assert geom_proj.is_valid and not geom_proj.is_empty, "Geometry inválida após dissolve."

# 3) Reprojetar para WGS84 (GEE espera lon/lat em EPSG:4326 nas coords do 'region')
gdf_wgs = gpd.GeoDataFrame(geometry=[geom_proj], crs=gdf.crs).to_crs(4326)

# 4) Exportar referência em GeoJSON (WGS84)
ref_dir = (INT_DIR / "boundaries"); ref_dir.mkdir(parents=True, exist_ok=True)
ref_geojson_fp = ref_dir / "pantanal_boundary_wgs84.geojson"
gdf_wgs.to_file(ref_geojson_fp, driver="GeoJSON")
print("✔ Boundary WGS84 salvo em:", ref_geojson_fp)

# 5) Construir ee.Geometry (aceita MultiPolygon também)
geojson = json.loads(gdf_wgs.to_json())
features = geojson["features"]
assert features, "GeoJSON sem features após reprojeção."

# Garante MultiPolygon/Polygon “limpo”
def _as_geom_coords(feat_geom):
    gtype = feat_geom.get("type","")
    if gtype == "Polygon":
        return [feat_geom["coordinates"]]
    elif gtype == "MultiPolygon":
        return feat_geom["coordinates"]
    else:
        raise ValueError(f"Tipo de geometria não suportado: {gtype}")

all_mparts = []
for feat in features:
    all_mparts.extend(_as_geom_coords(feat["geometry"]))

# Reconstrói como MultiPolygon → ee.Geometry
if len(all_mparts) == 1:
    pant = ee.Geometry.Polygon(all_mparts[0], proj=None, geodesic=True, evenOdd=True)
else:
    pant = ee.Geometry.MultiPolygon(all_mparts, proj=None, geodesic=True, evenOdd=True)

# 6) Versão simplificada apenas para usar como 'region' nos exports
#    A simplificação é feita no CRS do projeto (métrico), para tolerância em metros.
gdf_metric = gdf_wgs.to_crs(CONFIG["crs_epsg"])
geom_metric = gdf_metric.geometry.values[0].simplify(SIMPLIFY_TOL_M, preserve_topology=True)
geom_metric = geom_metric.buffer(0)  # reforça validade pós-simplificação
gdf_metric_simpl = gpd.GeoDataFrame(geometry=[geom_metric], crs=CONFIG["crs_epsg"]).to_crs(4326)

geojson_simpl = json.loads(gdf_metric_simpl.to_json())
parts_simpl = _as_geom_coords(geojson_simpl["features"][0]["geometry"])
if len(parts_simpl) == 1:
    pant_simpl = ee.Geometry.Polygon(parts_simpl[0], proj=None, geodesic=True, evenOdd=True)
else:
    pant_simpl = ee.Geometry.MultiPolygon(parts_simpl, proj=None, geodesic=True, evenOdd=True)
pant_bbox = pant_simpl.bounds(1)
# 7) Prints de sanidade
#    (área aproximada esférica em km², usando ee.Geometry area em m²)
area_full_km2  = pant.area(maxError=10).getInfo() / 1e6
area_simpl_km2 = pant_simpl.area(maxError=10).getInfo() / 1e6
print(f"✔ ee.Geometry criado. Área ~ full: {area_full_km2:,.1f} km² | simpl: {area_simpl_km2:,.1f} km²")
print(f"✔ pant e pant_simpl prontos (tolerância {SIMPLIFY_TOL_M} m).")


✔ Boundary WGS84 salvo em: /content/drive/MyDrive/Pantanal_TippingPoints/index/interim/boundaries/pantanal_boundary_wgs84.geojson
✔ ee.Geometry criado. Área ~ full: 151,438.0 km² | simpl: 151,463.5 km²
✔ pant e pant_simpl prontos (tolerância 500 m).


In [9]:
# @title 3.LANDSAT — Helpers Landsat L2 (escala, máscara) + índices NDVI/EVI/NBR/MNDWI
import ee

def sr_scale(img):
    """
    Aplica a escala dos produtos Landsat Collection 2 Level-2 (reflectância):
      SR = DN * 0.0000275 - 0.2
    Mantém QA_PIXEL/QA_RADSAT.
    """
    optical = img.select('SR_B.*').multiply(0.0000275).add(-0.2)
    qa = img.select('QA_PIXEL')
    return img.addBands(optical, None, True).addBands(qa)

def cloud_mask_l2(img):
    """
    Máscara de nuvem/sombra/neve + saturação radiométrica.
    Bits em QA_PIXEL (C2 L2):
      3: Cloud, 4: Shadow, 5: Snow
    QA_RADSAT: saturação em alguma banda.
    Além disso, restringe a faixa física após escala (~[-0.2, 1.0]).
    """
    qa = img.select('QA_PIXEL')
    cloud  = qa.bitwiseAnd(1 << 3).neq(0)
    shadow = qa.bitwiseAnd(1 << 4).neq(0)
    snow   = qa.bitwiseAnd(1 << 5).neq(0)
    mask_clouds = cloud.Or(shadow).Or(snow)

    # qualquer banda saturada?
    sat_any = img.select('QA_RADSAT').reduce(ee.Reducer.max()).gt(0)

    # faixa física pós-escala
    sr = img.select('SR_B.*')
    valid_low  = sr.reduce(ee.Reducer.min()).gt(-0.199)  # folga
    valid_high = sr.reduce(ee.Reducer.max()).lt(1.001)

    return (img
            .updateMask(mask_clouds.Not())
            .updateMask(sat_any.Not())
            .updateMask(valid_low.And(valid_high)))

def add_indices_for(sensor_key):
    """
    Calcula NDVI, EVI, NBR e MNDWI com mapeamento de bandas por sensor.
    Saídas clampadas a intervalos físicos para evitar estouros mais à frente.
    """
    def _fn(img):
        if sensor_key in ("L5_SR", "L7_SR"):
            b = {'BLUE':'SR_B1','GREEN':'SR_B2','RED':'SR_B3','NIR':'SR_B4','SWIR1':'SR_B5','SWIR2':'SR_B7'}
        else:  # L8_SR, L9_SR
            b = {'BLUE':'SR_B2','GREEN':'SR_B3','RED':'SR_B4','NIR':'SR_B5','SWIR1':'SR_B6','SWIR2':'SR_B7'}

        nir   = img.select(b['NIR'])
        red   = img.select(b['RED'])
        blue  = img.select(b['BLUE'])
        swir1 = img.select(b['SWIR1'])
        swir2 = img.select(b['SWIR2'])

        # NDVI
        ndvi = nir.subtract(red).divide(nir.add(red)).rename('NDVI')
        # EVI (com denominador protegido)
        den  = nir.add(red.multiply(6)).subtract(blue.multiply(7.5)).add(1)
        evi  = nir.subtract(red).multiply(2.5).divide(den.max(0.05)).rename('EVI')
        # NBR
        nbr  = nir.subtract(swir2).divide(nir.add(swir2)).rename('NBR')
        # MNDWI (usando GREEN e SWIR1)
        mndwi = img.expression('(g - s1) / (g + s1)', {'g': img.select(b['GREEN']), 's1': swir1}).rename('MNDWI')

        # clamp físico (evita estouro ao quantizar ×10000 depois)
        ndvi  = ndvi.max(-1).min(1)
        evi   = evi.max(-1.5).min(1.5)
        nbr   = nbr.max(-1).min(1)
        mndwi = mndwi.max(-1).min(1)

        return img.addBands([ndvi, evi, nbr, mndwi], overwrite=True)
    return _fn

def landsat_ic(sensor_key):
    """
    Retorna a ImageCollection Landsat C2 L2 do sensor, já com:
      - escala L2 aplicada
      - máscara de nuvem/sombra/saturação/faixa
      - bandas de índices NDVI, EVI, NBR, MNDWI calculadas
    """
    coll_id = CONFIG["ee_assets"]["landsat_collections"][sensor_key]
    coll = ee.ImageCollection(coll_id)
    return (coll
            .map(sr_scale)
            .map(cloud_mask_l2)
            .map(add_indices_for(sensor_key)))

print("✔ Célula 3.LANDSAT pronta: sr_scale, cloud_mask_l2, add_indices_for, landsat_ic.")


✔ Célula 3.LANDSAT pronta: sr_scale, cloud_mask_l2, add_indices_for, landsat_ic.


In [10]:
# @title Célula 4 — Compósito 30 m (estrito) + métricas 300 m (inclui MNDWI/dNBR/RdNBR)
import ee

def ensure_default_proj(img, scale_m=None):
    """Garante projeção default antes de reduceResolution."""
    if scale_m is None:
        scale_m = CONFIG['optical_pixel_m']  # 30 m
    proj = ee.Projection(f"EPSG:{CONFIG['crs_epsg']}").atScale(scale_m)
    return img.setDefaultProjection(proj)

def build_comp_for_export_strict(yyyymm: str) -> ee.Image:
    """
    Compósito 'estrito' em 30 m com as MESMAS máscaras validadas:
      - QA/clima já aplicadas na landsat_ic()
      - água: MNDWI <= threshold
      - presença em TODAS as bandas
      - OBS >= 1
    Retorna: NDVI, EVI, NBR, MNDWI (float), OBS (int)
    """
    start, end = period_for_yyyymm(yyyymm)
    ic = (
        landsat_ic('L5_SR').filterDate(start,end).filterBounds(pant)
        .merge(landsat_ic('L7_SR').filterDate(start,end).filterBounds(pant))
        .merge(landsat_ic('L8_SR').filterDate(start,end).filterBounds(pant))
        .merge(landsat_ic('L9_SR').filterDate(start,end).filterBounds(pant))
        .map(lambda im: im.select(['NDVI','EVI','NBR','MNDWI']))
    )

    comp = ic.reduce(ee.Reducer.median()).rename(['NDVI','EVI','NBR','MNDWI']).toFloat()
    obs  = ic.select('NDVI').count().rename('OBS')

    water_keep = comp.select('MNDWI').lte(CONFIG['mask_water_threshold'])
    valid_all  = comp.mask().reduce(ee.Reducer.min())
    has_obs    = obs.gte(1)
    common_mask = water_keep.And(valid_all).And(has_obs)

    comp_m = comp.updateMask(common_mask).clip(pant)
    obs_m  =  obs.updateMask(common_mask).clip(pant)

    return comp_m.addBands(obs_m)

def prev_yyyymm(yyyymm: str):
    try:
        i = TIME_SLICES.index(yyyymm)
        return TIME_SLICES[i-1] if i > 0 else None
    except Exception:
        return None

def build_fire_metrics_300m(yyyymm: str) -> ee.Image:
    """
    Em 300 m (float32): NDVI, EVI, NBR, MNDWI (do período atual),
                        dNBR (NBR_pre - NBR_pos), RdNBR, OBS (média).
    dNBR/RdNBR são calculados em 30 m e depois agregados por média (válidos).
    """
    yprev = prev_yyyymm(yyyymm)
    comp_cur_30 = ensure_default_proj(build_comp_for_export_strict(yyyymm), CONFIG['optical_pixel_m'])

    if yprev is None:
        idx30 = comp_cur_30.select(['NDVI','EVI','NBR','MNDWI'])
        obs30 = comp_cur_30.select('OBS').toFloat()
        reducer = ee.Reducer.mean()

        idx_agg = (idx30
                   .reduceResolution(reducer=reducer, maxPixels=4096)
                   .reproject(crs=f"EPSG:{CONFIG['crs_epsg']}", scale=CONFIG['agg_optical_to_m']))
        obs_agg = (obs30
                   .reduceResolution(reducer=reducer, maxPixels=4096)
                   .reproject(crs=f"EPSG:{CONFIG['crs_epsg']}", scale=CONFIG['agg_optical_to_m']))

        # bandas “vazias” no lugar de NaN
        empty = ee.Image(0).updateMask(ee.Image(0))
        dnbr_empty  = empty.rename('dNBR')
        rdnbr_empty = empty.rename('RdNBR')

        return (ee.Image.cat([
                    idx_agg.select(['NDVI','EVI','NBR','MNDWI']),
                    dnbr_empty, rdnbr_empty,
                    obs_agg.rename('OBS')
                ]).toFloat().clip(pant))

    comp_pre_30 = ensure_default_proj(build_comp_for_export_strict(yprev), CONFIG['optical_pixel_m'])
    nbr_pre = comp_pre_30.select('NBR')
    nbr_pos = comp_cur_30.select('NBR')

    dnbr_30  = nbr_pre.subtract(nbr_pos).rename('dNBR')
    eps  = ee.Image.constant(0.1)
    denom = nbr_pre.abs().sqrt().max(eps)
    rdnbr_30 = dnbr_30.divide(denom).rename('RdNBR')

    reducer = ee.Reducer.mean()

    idx_agg = (comp_cur_30.select(['NDVI','EVI','NBR','MNDWI'])
               .reduceResolution(reducer=reducer, maxPixels=4096)
               .reproject(crs=f"EPSG:{CONFIG['crs_epsg']}", scale=CONFIG['agg_optical_to_m']))
    dnbr_agg = (dnbr_30
                .reduceResolution(reducer=reducer, maxPixels=4096)
                .reproject(crs=f"EPSG:{CONFIG['crs_epsg']}", scale=CONFIG['agg_optical_to_m']))
    rdnbr_agg = (rdnbr_30
                 .reduceResolution(reducer=reducer, maxPixels=4096)
                 .reproject(crs=f"EPSG:{CONFIG['crs_epsg']}", scale=CONFIG['agg_optical_to_m']))
    obs_agg = (comp_cur_30.select('OBS').toFloat()
               .reduceResolution(reducer=reducer, maxPixels=4096)
               .reproject(crs=f"EPSG:{CONFIG['crs_epsg']}", scale=CONFIG['agg_optical_to_m']))

    return (ee.Image.cat([
                idx_agg.select(['NDVI','EVI','NBR','MNDWI']),
                dnbr_agg, rdnbr_agg,
                obs_agg.rename('OBS')
            ]).toFloat().clip(pant))

# (opcional) smoke-test rápido:
_test = build_comp_for_export_strict(TIME_SLICES[0])
print("✔ Célula 4 pronta: build_comp_for_export_strict e build_fire_metrics_300m definidas.")


✔ Célula 4 pronta: build_comp_for_export_strict e build_fire_metrics_300m definidas.


In [7]:
# @title Célula 5 — Export Óptico em 300 m (barra de progresso, skip se já existir)

import json, time, glob
from pathlib import Path
from tqdm.auto import tqdm

# --- Parâmetros do export (agora em 300 m) ---
COG = False                               # ligue depois de validar
EXPORT_SCALE_M   = CONFIG['agg_optical_to_m']   # <<< 300 m
NODATA_INT16     = CONFIG['nodata']             # -32768
OPT_FOLDER       = CONFIG["gee_drive_folder_optical"]  # pode usar outro nome ex.: "..._300m"

# --- Períodos deste lote (definidos na Célula 1 via CONFIG["years"]) ---
all_slices = get_time_slices(CONFIG["years"], CONFIG["calendar"])
print("Total de períodos (óptico, 300 m) neste lote:", len(all_slices))

# --- Checkpoint ---
CKP_OPT   = LOG_DIR / "export_optical_checkpoint.json"
state_opt = json.load(open(CKP_OPT)) if CKP_OPT.exists() else {"done": {}}
def _save_ckp_opt(): json.dump(state_opt, open(CKP_OPT, "w"))

# --- Helper: já existe opt_YYYYMM*.tif no Drive? ---
def already_on_drive(yyyymm: str) -> bool:
    roots = [Path("/content/drive/MyDrive"), Path("/content/drive/My Drive")]
    for root in roots:
        hits = glob.glob(str(root / OPT_FOLDER / f"opt_{yyyymm}*.tif"))
        if hits: return True
    return False

# --- Lista de períodos a rodar (respeita checkpoint e arquivos no Drive) ---
to_run = []
for yyyymm in all_slices:
    if state_opt["done"].get(yyyymm) and CONFIG["resume"]:
        continue
    if already_on_drive(yyyymm):
        state_opt["done"][yyyymm] = True
        continue
    to_run.append(yyyymm)

print(f"Períodos a processar neste lote (300 m): {len(to_run)}")

# --- Lançamento em ondas pequenas + barra e LOG de erro ---
QUEUE_SIZE = 4  # não agredir a fila do GEE
pending_batch = []
done_ids, last_states, errors = set(), {}, {}

def launch_task_300m(yyyymm):
    # 1) NDVI/EVI/NBR (pos), dNBR, RdNBR, OBS — todos já em 300 m (float32)
    fire300 = build_fire_metrics_300m(yyyymm)

    # 2) quantização:
    #    - índices e dNBR/RdNBR: ×10000 → int16
    #    - OBS (média em 300 m): arredonda e vira int16 (escala natural)
    idx_names = ['NDVI','EVI','NBR','MNDWI','dNBR','RdNBR']
    idx = fire300.select(idx_names).multiply(10000).round().toInt16()
    obs = fire300.select('OBS').round().toInt16()

    # 3) NoData explícito
    nod = ee.Image.constant(int(NODATA_INT16)).toInt16()
    idx = idx.unmask(nod); obs = obs.unmask(nod)
    out = idx.addBands(obs)

    fmt_opts = {'noData': int(NODATA_INT16)}
    if COG: fmt_opts.update({'cloudOptimized': True})

    task = ee.batch.Export.image.toDrive(
        image = out,
        description = f"opt_{yyyymm}",     # você pode renomear p/ "fire_" se preferir
        folder = OPT_FOLDER,
        fileNamePrefix = f"opt_{yyyymm}",
        region = pant_simpl,
        crs = f"EPSG:{CONFIG['crs_epsg']}",
        scale = CONFIG['agg_optical_to_m'],   # 300 m
        maxPixels = 1e13,
        fileFormat = 'GeoTIFF',
        formatOptions = fmt_opts
    )
    task.start()
    return task

with tqdm(total=len(to_run), desc="Óptico 300 m — períodos concluídos", dynamic_ncols=True) as pbar:
    i = 0
    while i < len(to_run) or pending_batch:
        # Enfileira até QUEUE_SIZE
        while i < len(to_run) and len(pending_batch) < QUEUE_SIZE:
            y = to_run[i]; i += 1
            t = launch_task_300m(y)
            pending_batch.append((y, t))

        # Poll nas tasks
        newly_done = 0
        keep = []
        for y, t in pending_batch:
            st = t.status()
            state = st.get('state','UNKNOWN')
            if last_states.get(t.id) != state:
                log(f"[opt_{y}] {state}")
                last_states[t.id] = state

            if state == "COMPLETED":
                if not state_opt["done"].get(y):
                    state_opt["done"][y] = True
                newly_done += 1
                done_ids.add(t.id)
            elif state in ("FAILED","CANCELLED"):
                err_msg = st.get('error_message') or ''
                errors[y] = err_msg
                log(f"[opt_{y}] {state} — {err_msg}")
                newly_done += 1
                done_ids.add(t.id)
            else:
                keep.append((y,t))
        pending_batch = keep

        if newly_done:
            pbar.update(newly_done)
            _save_ckp_opt()

        if pending_batch or i < len(to_run):
            time.sleep(10)

print(f"✔ Óptico 300 m: {len(state_opt['done'])} períodos marcados como concluídos (checkpoint).")
if errors:
    print("\n⚠️ Alguns períodos falharam. Exemplo de mensagens do GEE:")
    for y, msg in list(errors.items())[:10]:
        print(f"  - {y}: {msg}")
print(f"✔ Checkpoint salvo em:", CKP_OPT)






Total de períodos (óptico, 300 m) neste lote: 60
Períodos a processar neste lote (300 m): 35


Óptico 300 m — períodos concluídos:   0%|          | 0/35 [00:00<?, ?it/s]

[2025-10-04 09:10:32] [opt_200101] RUNNING
[2025-10-04 09:10:32] [opt_200103] RUNNING
[2025-10-04 09:10:32] [opt_200105] READY
[2025-10-04 09:10:32] [opt_200201] READY
[2025-10-04 09:10:43] [opt_200105] RUNNING
[2025-10-04 09:10:43] [opt_200201] RUNNING
[2025-10-04 09:15:38] [opt_200103] READY
[2025-10-04 09:15:49] [opt_200103] RUNNING
[2025-10-04 09:15:49] [opt_200201] READY
[2025-10-04 09:16:00] [opt_200201] RUNNING
[2025-10-04 09:36:27] [opt_200103] FAILED
[2025-10-04 09:36:27] [opt_200103] FAILED — Execution failed; out of memory.
[2025-10-04 09:36:28] [opt_200201] FAILED
[2025-10-04 09:36:28] [opt_200201] FAILED — Execution failed; out of memory.
[2025-10-04 09:36:47] [opt_200101] FAILED
[2025-10-04 09:36:47] [opt_200101] FAILED — Execution failed; out of memory.
[2025-10-04 09:36:47] [opt_200401] READY
[2025-10-04 09:36:47] [opt_200403] READY
[2025-10-04 09:37:02] [opt_200405] READY
[2025-10-04 09:37:13] [opt_200401] RUNNING
[2025-10-04 09:37:13] [opt_200403] RUNNING
[2025-10-04 

In [11]:
# @title 5B — Reprocessar meses problemáticos com fallback (bbox → tiles)
import glob, time
from pathlib import Path
from tqdm.auto import tqdm

OPT_FOLDER       = CONFIG["gee_drive_folder_optical"]
NODATA_INT16     = int(CONFIG["nodata"])
QUEUE_SIZE_FALL  = 2   # fila menor pra reduzir pressão
NX, NY           = 2, 3  # grade 2x3 (ajuste p/ 3x4 se necessário)

# 0) helpers
def _list_opt_files(yyyymm):
    roots = [Path("/content/drive/MyDrive"), Path("/content/drive/My Drive")]
    fps=[]
    for r in roots: fps += glob.glob(str(r/OPT_FOLDER/f"opt_{yyyymm}*.tif"))
    return sorted(set(fps))

# retângulo leve (se ainda não existir)
try:
    pant_bbox
except NameError:
    pant_bbox = pant_simpl.bounds(1)

def make_grid(region_geom, nx=2, ny=3):
    b = ee.Geometry(region_geom).bounds(1)
    coords = ee.List(b.coordinates().get(0))
    ll = ee.List(coords.get(0)); ur = ee.List(coords.get(2))
    xmin = ee.Number(ll.get(0)); ymin = ee.Number(ll.get(1))
    xmax = ee.Number(ur.get(0)); ymax = ee.Number(ur.get(1))
    dx = xmax.subtract(xmin).divide(nx); dy = ymax.subtract(ymin).divide(ny)
    tiles=[]
    for iy in range(ny):
        for ix in range(nx):
            x0 = xmin.add(dx.multiply(ix)); x1 = x0.add(dx)
            y0 = ymin.add(dy.multiply(iy)); y1 = y0.add(dy)
            tiles.append(ee.Geometry.Rectangle([x0,y0,x1,y1], None, True))
    return tiles

TILES_BBOX = make_grid(pant_bbox, nx=NX, ny=NY)

# 1) meses faltantes = sem arquivo no Drive
missing = [ym for ym in get_time_slices(CONFIG["years"], CONFIG["calendar"]) if not _list_opt_files(ym)]
print("Meses faltantes p/ reprocessar:", missing[:10], "…" if len(missing)>10 else "")

if not missing:
    print("✔ Nada a reprocessar. Tudo certo! ")
else:
    # 2) export por bbox (rápido) — tenta primeiro
    def launch_bbox(yyyymm):
        fire300 = build_fire_metrics_300m(yyyymm)
        idx_names = ['NDVI','EVI','NBR','MNDWI','dNBR','RdNBR']
        idx = fire300.select(idx_names).multiply(10000).round().toInt16()
        obs = fire300.select('OBS').round().toInt16()
        nod = ee.Image.constant(NODATA_INT16).toInt16()
        out = idx.unmask(nod).addBands(obs.unmask(nod))

        fmt_opts = {'noData': NODATA_INT16}
        if COG: fmt_opts.update({'cloudOptimized': True})

        t = ee.batch.Export.image.toDrive(
            image = out,
            description = f"opt_{yyyymm}_bbox",
            folder = OPT_FOLDER,
            fileNamePrefix = f"opt_{yyyymm}",
            region = pant_bbox,  # <<< retângulo leve
            crs = f"EPSG:{CONFIG['crs_epsg']}",
            scale = CONFIG['agg_optical_to_m'],
            maxPixels = 1e13,
            fileFormat = 'GeoTIFF',
            formatOptions = fmt_opts
        )
        t.start()
        return t

    # 3) export por tiles (2x3) — se bbox falhar
    def launch_tiles(yyyymm):
        fire300 = build_fire_metrics_300m(yyyymm)
        idx_names = ['NDVI','EVI','NBR','MNDWI','dNBR','RdNBR']
        idx = fire300.select(idx_names).multiply(10000).round().toInt16()
        obs = fire300.select('OBS').round().toInt16()
        nod = ee.Image.constant(NODATA_INT16).toInt16()
        out = idx.unmask(nod).addBands(obs.unmask(nod))

        fmt_opts = {'noData': NODATA_INT16}
        if COG: fmt_opts.update({'cloudOptimized': True})

        tasks=[]
        for k,tile in enumerate(TILES_BBOX):
            t = ee.batch.Export.image.toDrive(
                image = out,
                description = f"opt_{yyyymm}_t{k}",
                folder = OPT_FOLDER,
                fileNamePrefix = f"opt_{yyyymm}_t{k}",
                region = tile,
                crs = f"EPSG:{CONFIG['crs_epsg']}",
                scale = CONFIG['agg_optical_to_m'],
                maxPixels = 1e13,
                fileFormat = 'GeoTIFF',
                formatOptions = fmt_opts
            )
            t.start(); tasks.append(t)
        return tasks

    # 4) scheduler: tenta bbox; se falhar, tenta tiles
    def poll_tasks(task_objs):
        """Retorna ('COMPLETED'|'FAILED'|'RUNNING', messages) para lista de tasks."""
        states = []
        msgs   = []
        for t in task_objs:
            st = t.status(); states.append(st.get('state','UNKNOWN'))
            if 'error_message' in st: msgs.append(st['error_message'])
        if all(s == "COMPLETED" for s in states):
            return "COMPLETED", msgs
        if any(s in ("FAILED","CANCELLED") for s in states):
            return "FAILED", msgs
        return "RUNNING", msgs

    # lança em lotes pequenos
    queue = []
    i = 0
    with tqdm(total=len(missing), desc="Fallback (bbox→tiles) — meses ok", dynamic_ncols=True) as pbar:
        finished = set()
        while i < len(missing) or queue:
            while i < len(missing) and len(queue) < QUEUE_SIZE_FALL:
                ym = missing[i]; i += 1
                # se já existir arquivo (concorrência), pula
                if _list_opt_files(ym):
                    finished.add(ym); pbar.update(1); continue
                # tenta BBOX
                t = launch_bbox(ym)
                queue.append((ym, "bbox", [t], time.time()))

            keep=[]
            for ym, mode, tasks, t0 in queue:
                state, msgs = poll_tasks(tasks)
                if state == "COMPLETED":
                    finished.add(ym); pbar.update(1)
                elif state == "FAILED":
                    if mode == "bbox":
                        # tenta tiles
                        tasks2 = launch_tiles(ym)
                        keep.append((ym, "tiles", tasks2, time.time()))
                        log(f"[{ym}] bbox falhou → trocando para tiles ({NX}x{NY}).")
                    else:
                        log(f"[{ym}] tiles falharam: {msgs[:1]}")
                        # marca como finalizado (com erro). Você pode re-rodar depois com NX/NY maiores.
                        finished.add(ym); pbar.update(1)
                else:
                    keep.append((ym, mode, tasks, t0))
            queue = keep
            if queue or i < len(missing):
                time.sleep(12)

    print(f"✔ Fallback concluído. Meses processados (ou marcados): {len(finished)}/{len(missing)}")
    # dica: se algum mês ainda ficou faltando, aumente a grade p/ NX=3, NY=4 e rode 5B de novo.


Meses faltantes p/ reprocessar: ['200101', '200103', '200105', '200201', '200401', '200403', '200405', '200501', '200503', '200505'] …


Fallback (bbox→tiles) — meses ok:   0%|          | 0/25 [00:00<?, ?it/s]

[2025-10-04 18:07:12] [200101] bbox falhou → trocando para tiles (2x3).
[2025-10-04 18:20:15] [200103] bbox falhou → trocando para tiles (2x3).
[2025-10-04 18:33:11] [200101] tiles falharam: ['Execution failed; out of memory.']
[2025-10-04 19:24:50] [200103] tiles falharam: ['Execution failed; out of memory.']
[2025-10-04 20:23:28] [200201] bbox falhou → trocando para tiles (2x3).


KeyboardInterrupt: 

In [None]:
# @title Célula 6.QC — QC leve dos exports 300 m (multi-meses + CSV)
import glob, json, math
from pathlib import Path
import numpy as np
import rasterio as rio
from tqdm.auto import tqdm
import pandas as pd

OPT_FOLDER = CONFIG["gee_drive_folder_optical"]
WATER_THR  = CONFIG.get("mask_water_threshold", 0.05)
NODATA_I16 = int(CONFIG.get("nodata", -32768))

# ---------- utilidades ----------
def _list_month_files(yyyymm):
    roots = [Path("/content/drive/MyDrive"), Path("/content/drive/My Drive")]
    fps=[]
    for r in roots:
        fps.extend(glob.glob(str(r/OPT_FOLDER/f"opt_{yyyymm}*.tif")))
    return sorted(set(fps))

def _iter_valid_block(src, window):
    nd = src.nodata
    b1 = src.read(1, window=window).astype(np.int16)  # NDVI
    b2 = src.read(2, window=window).astype(np.int16)  # EVI
    b3 = src.read(3, window=window).astype(np.int16)  # NBR
    b4 = src.read(4, window=window).astype(np.int16)  # MNDWI
    b5 = src.read(5, window=window).astype(np.int16) if src.count >= 5 else None  # dNBR
    b6 = src.read(6, window=window).astype(np.int16) if src.count >= 6 else None  # RdNBR
    b7 = src.read(7, window=window).astype(np.int16) if src.count >= 7 else None  # OBS

    valid = (b1!=nd)&(b2!=nd)&(b3!=nd)&(b4!=nd)
    if b7 is not None:
        valid &= (b7!=nd)&(b7>=1)
    if not np.any(valid):
        return None

    ndvi  = (b1[valid].astype(np.float32))/10000.0
    evi   = (b2[valid].astype(np.float32))/10000.0
    nbr   = (b3[valid].astype(np.float32))/10000.0
    mndwi = (b4[valid].astype(np.float32))/10000.0

    # >>> filtra NoData de dNBR/RdNBR antes de escalar
    if b5 is not None:
        dmask = (b5[valid] != nd)
        dnbr  = (b5[valid][dmask].astype(np.float32))/10000.0
    else:
        dnbr = None
    if b6 is not None:
        rmask = (b6[valid] != nd)
        rdnbr = (b6[valid][rmask].astype(np.float32))/10000.0
    else:
        rdnbr = None

    obs = (b7[valid].astype(np.float32)) if b7 is not None else None
    return ndvi, evi, nbr, mndwi, dnbr, rdnbr, obs, valid.sum(), b1.size

def qc_one_month(yyyymm, target_samples=150_000, seed=123):
    fps = _list_month_files(yyyymm)
    if not fps:
        return {"yyyymm": yyyymm, "n_files": 0, "note": "sem_arquivos"}

    rng = np.random.default_rng(seed)
    # 1) contar válidos totais
    total_valid = 0
    total_px    = 0
    crs_set, res_set, nod_set, band_set = set(), set(), set(), set()

    for fp in fps:
        with rio.open(fp) as src:
            crs_set.add(str(src.crs))
            xres, yres = src.res
            res_set.add((float(xres), float(yres)))
            nod_set.add(src.nodata)
            band_set.add(src.count)
            for _, w in src.block_windows(1):
                blk = _iter_valid_block(src, w)
                if blk is None:
                    total_px += w.height * w.width
                    continue
                _, _, _, _, _, _, _, n_valid, n_tot = blk
                total_valid += n_valid
                total_px    += n_tot

    if total_valid == 0:
        return {"yyyymm": yyyymm, "n_files": len(fps), "note": "sem_validos",
                "crs": ";".join(sorted(crs_set)), "res": str(sorted(res_set)),
                "nodata": str(sorted(nod_set)), "bands": str(sorted(band_set))}

    # 2) fração de amostragem
    p = min(1.0, target_samples / float(total_valid))

    all_ndvi, all_evi, all_nbr, all_mndwi = [], [], [], []
    all_dnbr, all_rdnbr, all_obs = [], [], []
    wmask_cnt = 0
    valid_cnt = 0
    nd_cnt    = 0

    # 3) segunda passada: amostrar
    for fp in fps:
        with rio.open(fp) as src:
            nd = src.nodata
            for _, w in src.block_windows(1):
                blk = _iter_valid_block(src, w)
                if blk is None:
                    nd_cnt += w.height * w.width
                    continue

                ndvi, evi, nbr, mndwi, dnbr, rdnbr, obs, n_valid, _ = blk
                valid_cnt += n_valid
                wmask_cnt += np.count_nonzero(mndwi > WATER_THR)

                # --- SUBAMOSTRAGEM SEGURA ---
                # base (NDVI/EVI/NBR/MNDWI/OBS compartilham o mesmo conjunto de válidos)
                n = ndvi.size
                k = int(np.round(p * n))
                if k > 0:
                    idx = rng.choice(n, size=min(k, n), replace=False)
                    all_ndvi.append(ndvi[idx])
                    all_evi.append(evi[idx])
                    all_nbr.append(nbr[idx])
                    all_mndwi.append(mndwi[idx])
                    if obs is not None and obs.size == n:
                        all_obs.append(obs[idx])

                # dNBR e RdNBR podem ter comprimentos menores (após filtrar NoData)
                if dnbr is not None and dnbr.size > 0:
                    k2 = int(np.round(p * dnbr.size))
                    if k2 > 0:
                        idx2 = rng.choice(dnbr.size, size=min(k2, dnbr.size), replace=False)
                        all_dnbr.append(dnbr[idx2])

                if rdnbr is not None and rdnbr.size > 0:
                    k3 = int(np.round(p * rdnbr.size))
                    if k3 > 0:
                        idx3 = rng.choice(rdnbr.size, size=min(k3, rdnbr.size), replace=False)
                        all_rdnbr.append(rdnbr[idx3])

    def _stats(arrs, name):
        if not arrs:
            return {f"{name}_p1": np.nan, f"{name}_p50": np.nan, f"{name}_p99": np.nan,
                    f"{name}_pct_zero": np.nan, f"{name}_n": 0}
        v = np.concatenate(arrs)
        if v.size == 0:
            return {f"{name}_p1": np.nan, f"{name}_p50": np.nan, f"{name}_p99": np.nan,
                    f"{name}_pct_zero": np.nan, f"{name}_n": 0}
        p1, p50, p99 = np.nanpercentile(v, [1, 50, 99])
        pct0 = 100 * np.mean(np.isclose(v, 0.0, atol=1e-6))
        return {f"{name}_p1": float(p1), f"{name}_p50": float(p50), f"{name}_p99": float(p99),
                f"{name}_pct_zero": float(pct0), f"{name}_n": int(v.size)}

    rec = {"yyyymm": yyyymm, "n_files": len(fps),
           "crs": ";".join(sorted(crs_set)),
           "res": str(sorted(res_set)),
           "nodata": str(sorted(nod_set)),
           "bands": str(sorted(band_set))}

    rec.update(_stats(all_ndvi, "NDVI"))
    rec.update(_stats(all_evi,  "EVI"))
    rec.update(_stats(all_nbr,  "NBR"))
    rec.update(_stats(all_mndwi,"MNDWI"))

    if all_dnbr:
        v = np.concatenate(all_dnbr)
        rec.update(_stats([v], "dNBR"))
        rec["dNBR_p95"] = float(np.nanpercentile(v, 95))
    else:
        rec.update({k: np.nan for k in ["dNBR_p1","dNBR_p50","dNBR_p99","dNBR_pct_zero","dNBR_n","dNBR_p95"]})

    if all_rdnbr:
        v = np.concatenate(all_rdnbr)
        rec.update(_stats([v], "RdNBR"))
    else:
        rec.update({k: np.nan for k in ["RdNBR_p1","RdNBR_p50","RdNBR_p99","RdNBR_pct_zero","RdNBR_n"]})

    if all_obs:
        o = np.concatenate(all_obs)
        rec.update({
            "OBS_min": float(np.nanmin(o)),
            "OBS_med": float(np.nanmedian(o)),
            "OBS_max": float(np.nanmax(o)),
            "OBS_n":   int(o.size)
        })
    else:
        rec.update({"OBS_min": np.nan, "OBS_med": np.nan, "OBS_max": np.nan, "OBS_n": 0})

    rec["pct_water_high_among_valid"] = 100.0 * (wmask_cnt / max(1, valid_cnt))
    rec["approx_pct_nodata_any"]     = 100.0 * (nd_cnt    / max(1, valid_cnt + nd_cnt))

    return rec

# ---------- seleção dos meses p/ QC ----------
# por padrão, QC nos períodos deste lote que realmente existem no Drive
months_drive = []
for y in TIME_SLICES:
    if _list_month_files(y):
        months_drive.append(y)

print(f"Meses encontrados no Drive p/ QC: {len(months_drive)}")
print("Ex.:", months_drive[:5], "…", months_drive[-5:])

# você pode filtrar aqui, por exemplo:
# months_drive = [m for m in months_drive if m.startswith("1992")]

# ---------- rodar QC ----------
rows=[]
for ym in tqdm(months_drive, desc="QC 300m — meses"):
    rows.append(qc_one_month(ym, target_samples=150_000, seed=123))

df = pd.DataFrame(rows).sort_values("yyyymm")
display_cols = [
    "yyyymm","n_files","approx_pct_nodata_any","pct_water_high_among_valid",
    "NDVI_p1","NDVI_p50","NDVI_p99",
    "EVI_p1","EVI_p50","EVI_p99",
    "NBR_p1","NBR_p50","NBR_p99",
    "MNDWI_p1","MNDWI_p50","MNDWI_p99",
    "dNBR_p50","dNBR_p95","RdNBR_p50",
    "OBS_min","OBS_med","OBS_max"
]
print("— Resumo (amostra) —")
print(df[display_cols].head(10).to_string(index=False))

# salvar CSV completo
qc_csv = OUT_DIR / "qc_optical_300m_summary.csv"
df.to_csv(qc_csv, index=False)
print("✔ QC salvo em:", qc_csv)



Meses encontrados no Drive p/ QC: 29
Ex.: ['199601', '199603', '199605', '199607', '199609'] … ['200003', '200005', '200007', '200009', '200011']


QC 300m — meses:   0%|          | 0/29 [00:00<?, ?it/s]

— Resumo (amostra) —
yyyymm  n_files  approx_pct_nodata_any  pct_water_high_among_valid  NDVI_p1  NDVI_p50  NDVI_p99   EVI_p1  EVI_p50  EVI_p99    NBR_p1  NBR_p50  NBR_p99  MNDWI_p1  MNDWI_p50  MNDWI_p99  dNBR_p50  dNBR_p95  RdNBR_p50  OBS_min  OBS_med  OBS_max
199601        2              41.038998                         0.0 0.445197    0.6552  0.831203 0.158297   0.3651   0.5729  0.277400   0.5941 0.763100 -0.571500    -0.4441  -0.065700       NaN       NaN        NaN      1.0      3.0     15.0
199603        2              41.048061                         0.0 0.455800    0.6458  0.818400 0.165195   0.3456   0.5344  0.238000   0.5644 0.755800 -0.582300    -0.4799  -0.075395    0.0218  0.110045    0.02870      1.0      5.0     18.0
199605        2              41.230009                         0.0 0.436300    0.6309  0.810800 0.149807   0.3282   0.5111  0.182607   0.5276 0.761300 -0.597093    -0.4973  -0.085100    0.0261  0.104900    0.03640      1.0      2.0      9.0
199607        2

In [None]:

# @title Célula 6 — Export VOD (SMAP vod_pm) separado, com barra de progresso

import json, time
from tqdm.notebook import tqdm

if not CONFIG.get("use_smap_vod", True):
    print("VOD está desativado em CONFIG['use_smap_vod']=False.")
else:
    VOD_FOLDER = CONFIG["gee_drive_folder_vod"]
    all_slices_vod = get_time_slices(CONFIG["years"], CONFIG["calendar"])
    print("Total de períodos (VOD):", len(all_slices_vod))

    # checkpoint VOD
    CKP_VOD = LOG_DIR / "export_vod_checkpoint.json"
    vod_state = json.load(open(CKP_VOD)) if CKP_VOD.exists() else {"done": {}}

    smap = ee.ImageCollection(CONFIG["ee_assets"]["smap_vod"]).select('vod_pm')

    pending_vod = []
    for yyyymm in all_slices_vod:
        if vod_state["done"].get(yyyymm) and CONFIG["resume"]:
            continue

        start, end = period_for_yyyymm(yyyymm)
        mean = smap.filterDate(start, end).mean().set({'yyyymm': yyyymm})

        task = ee.batch.Export.image.toDrive(
            image = mean.toFloat(),
            description = f"vod_{yyyymm}",
            folder = VOD_FOLDER,
            fileNamePrefix = f"vod_{yyyymm}",
            region = pant_simpl,
            crs = f"EPSG:{CONFIG['crs_epsg']}",
            scale = 10000,
            maxPixels = 1e13,
            fileFormat = 'GeoTIFF',
            formatOptions = {'noData': -9999}
        )
        task.start()
        pending_vod.append((yyyymm, task))

    print(f"Tasks lançadas (VOD): {len(pending_vod)}")

    # barra de progresso VOD
    completed = 0
    with tqdm(total=len(pending_vod), desc="VOD — períodos concluídos") as pbar:
        last_states = {}
        while completed < len(pending_vod):
            newly_done = 0
            for yyyymm, t in pending_vod:
                st = t.status()
                state = st.get('state', 'UNKNOWN')
                if last_states.get(t.id) != state:
                    log(f"[VOD {yyyymm}] {state}")
                    last_states[t.id] = state
                if state == "COMPLETED" and not vod_state["done"].get(yyyymm):
                    vod_state["done"][yyyymm] = True
                    newly_done += 1
            if newly_done:
                completed += newly_done
                pbar.update(newly_done)
                json.dump(vod_state, open(CKP_VOD, "w"))
            if completed < len(pending_vod):
                time.sleep(10)

    print(f"✔ VOD finalizado: {completed}/{len(pending_vod)} períodos concluídos.")
    print(f"✔ Checkpoint salvo em: {CKP_VOD}")







[198601] NDVI amostrado (275 px @ 240 m): p1=0.4155  p50=0.6534  p99=0.8337  %zeros=0.00%
OBS — min/med/max: 1 2.0 12
Amostras prontas para reuso (lon/lat) → SAMPLE_POINTS.shape: (275, 2)
