In [1]:
from dataclasses import dataclass
from pathlib import Path
from textwrap import dedent
import subprocess, re, json, shutil
import xml.etree.ElementTree as ET

@dataclass
class CFG:
    study_area: str      = "Monsanto"
    year_token: str      = "20200000"   # YYYY0000
    tile_code: str       = "A"
    country: str         = "PT"         # PT -> 3763, ES -> 25830
    gpt_exe: str         = r"C:\Program Files\esa-snap\bin\gpt.exe"
    gpt_mem: str         = "6G"
    input_root: Path     = Path(r"M:\Project BLS\S1\GRD\Input")
    work_root: Path      = Path(r"M:\Project BLS\S1\Work")
    exports_root: Path   = Path(r"M:\Project BLS\S1\Exports")
    max_inputs_per_comp: int = 6

    @property
    def epsg(self) -> str:
        return "EPSG:3763" if self.country.upper()=="PT" else "EPSG:25830"

cfg = CFG()

# Ensure the required directory structure exists
cfg.work_root.mkdir(parents=True, exist_ok=True)
(cfg.work_root / "GammaSigma").mkdir(parents=True, exist_ok=True)
(cfg.work_root / "Beta").mkdir(parents=True, exist_ok=True)
cfg.exports_root.mkdir(parents=True, exist_ok=True)

def preflight():
    print("== PREFLIGHT ==")
    if not Path(cfg.gpt_exe).exists():
        raise FileNotFoundError(f"SNAP GPT not found: {cfg.gpt_exe}")

    try:
        out = subprocess.run([cfg.gpt_exe, "-h"], capture_output=True, text=True)
        if out.returncode != 0:
            raise RuntimeError(out.stderr[:300])
    except Exception as e:
        raise RuntimeError("Cannot execute SNAP GPT. Check Java/SNAP install.") from e

    if shutil.which("gdalinfo") is None:
        raise RuntimeError("GDAL not on PATH (missing gdalinfo). Install GDAL or add to PATH.")
    if shutil.which("gdal_translate") is None:
        raise RuntimeError("GDAL not on PATH (missing gdal_translate).")

    print("✓ GPT OK:", cfg.gpt_exe)
    print("✓ GDAL OK:", subprocess.check_output(["gdalinfo", "--version"], text=True).strip())
    print("✓ EPSG:", cfg.epsg)
    print()

preflight()


In [3]:
def run_gpt(graph_xml: Path, mem: str = cfg.gpt_mem, verbose=True):
    """Run SNAP GPT on a graph, optionally silent. Raises if failure."""
    cmd = [cfg.gpt_exe, str(graph_xml), "-c", mem]
    if verbose:
        print("RUN:", " ".join(cmd))
    out = subprocess.run(cmd, capture_output=True, text=True)
    if out.returncode != 0:
        print("\nSTDOUT tail:\n", "\n".join(out.stdout.splitlines()[-60:]))
        print("\nSTDERR tail:\n", "\n".join(out.stderr.splitlines()[-60:]))
        raise RuntimeError(f"GPT failed on graph: {graph_xml}")
    return out


def find_grd_inputs(root: Path):
    """Recursively locate Sentinel-1 GRD inputs (.SAFE or .zip)."""
    results = []
    for p in root.rglob("*"):
        name = p.name.upper()
        if "_IW_" in name and "_GRDH_" in name:
            if p.is_dir() and name.endswith(".SAFE"):
                results.append(p)
            elif p.is_file() and name.endswith(".ZIP"):
                results.append(p)
    return sorted(results)


def sense_token(stem: str) -> str:
    """Extract 20200509T183527 timestamp token from filename."""
    m = re.search(r"\d{8}T\d{6}", stem)
    return m.group(0) if m else stem

def sort_by_time(paths):
    # stable sort by acquisition timestamp token if present
    return sorted(paths, key=lambda p: sense_token(Path(p).stem))

def cap_scenes(scènes, max_n, policy="latest"):
    """
    policy:
      - "latest": keep most recent max_n scenes
      - "earliest": keep earliest max_n scenes
    """
    scènes = sort_by_time(scènes)
    if max_n is None or len(scènes) <= max_n:
        return scènes

    if policy == "latest":
        kept = scènes[-max_n:]
    else:
        kept = scènes[:max_n]

    dropped = [p for p in scènes if p not in kept]
    print(f"⚠️ Capping scenes: {len(scènes)} → {len(kept)} (policy={policy}). Dropping {len(dropped)} scenes.")
    print("   Dropped (first 5):", [Path(x).name for x in dropped[:5]])
    return kept


def gdal_band_count(tif: Path) -> int:
    """Return band count for a GeoTIFF."""
    try:
        info = subprocess.check_output(["gdalinfo", "-json", str(tif)], text=True)
        j = json.loads(info)
        return len(j.get("bands", []))
    except Exception as e:
        raise RuntimeError(f"gdalinfo failed on {tif}") from e


def build_name(variable: str, suffix=""):
    """
    Standardized naming: Monsanto_20200000_S1GRD_GAMMAVH_A.tif
    Suffix can be used later for publish/debug variants.
    """
    core = f"{cfg.study_area}_{cfg.year_token}_S1GRD_{variable}_{cfg.tile_code}"
    return f"{core}{suffix}.tif"



In [5]:

# ---- Embedded templates (trimmed to the essentials) ----
XMLS = {
    "GS_TF": """<graph id="GS_TF">
  <version>1.0</version>

  <node id="Read">
    <operator>Read</operator>
    <parameters>
      <file>{INPUT}</file>
    </parameters>
  </node>

  <node id="Apply-Orbit-File">
    <operator>Apply-Orbit-File</operator>
    <sources>
      <sourceProduct refid="Read"/>
    </sources>
    <parameters>
      <orbitType>Sentinel Precise (Auto Download)</orbitType>
      <polyDegree>3</polyDegree>
    </parameters>
  </node>

  <node id="ThermalNoiseRemoval">
    <operator>ThermalNoiseRemoval</operator>
    <sources>
      <sourceProduct refid="Apply-Orbit-File"/>
    </sources>
  </node>

  <node id="Calibration">
    <operator>Calibration</operator>
    <sources>
      <sourceProduct refid="ThermalNoiseRemoval"/>
    </sources>
    <parameters>
      <outputSigmaBand>true</outputSigmaBand>
      <outputGammaBand>true</outputGammaBand>
      <outputBetaBand>true</outputBetaBand>
    </parameters>
  </node>

  <node id="Terrain-Flattening">
    <operator>Terrain-Flattening</operator>
    <sources>
      <sourceProduct refid="Calibration"/>
    </sources>
    <parameters>
      <demName>SRTM 1Sec HGT</demName>
      <demResamplingMethod>BICUBIC_INTERPOLATION</demResamplingMethod>
      <outputSigma0>true</outputSigma0>
    </parameters>
  </node>

  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="Terrain-Flattening"/>
    </sources>
    <parameters>
      <file>{OUTPUT}</file>
      <formatName>BEAM-DIMAP</formatName>
    </parameters>
  </node>

</graph>""",

    "GS_TC": """<graph id="GS_TC">
  <version>1.0</version>

  <node id="Read">
    <operator>Read</operator>
    <parameters>
      <file>{INPUT}</file>
    </parameters>
  </node>

  <node id="Speckle-Filter">
    <operator>Speckle-Filter</operator>
    <sources>
      <sourceProduct refid="Read"/>
    </sources>
    <parameters>
      <filter>Lee</filter>
      <windowSize>7x7</windowSize>
    </parameters>
  </node>

  <node id="Terrain-Correction">
    <operator>Terrain-Correction</operator>
    <sources>
      <sourceProduct refid="Speckle-Filter"/>
    </sources>
    <parameters>
      <demName>SRTM 1Sec HGT</demName>
      <mapProjection>{EPSG}</mapProjection>
      <pixelSpacingInMeter>25.0</pixelSpacingInMeter>
      <saveLocalIncidenceAngle>true</saveLocalIncidenceAngle>
      <saveDEM>true</saveDEM>
      <nodataValueAtSea>true</nodataValueAtSea>
    </parameters>
  </node>

  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="Terrain-Correction"/>
    </sources>
    <parameters>
      <file>{OUTPUT}</file>
      <formatName>BEAM-DIMAP</formatName>
    </parameters>
  </node>

</graph>""",

    "GS_FEAT": """
<graph id="GS_FEAT">
  <version>1.0</version>

  <node id="Read">
    <operator>Read</operator>
    <parameters>
      <file>{INPUT}</file>
    </parameters>
  </node>

  <node id="GLCM">
    <operator>GLCM</operator>
    <sources>
      <sourceProduct refid="Read"/>
    </sources>
    <parameters>
      <sourceBands>Gamma0_VH,Gamma0_VV,Sigma0_VH,Sigma0_VV</sourceBands>
      <windowSizeStr>5x5</windowSizeStr>
      <angleStr>ALL</angleStr>
      <quantizerStr>Probabilistic Quantizer</quantizerStr>
      <quantizationLevelsStr>64</quantizationLevelsStr>
      <displacement>1</displacement>
      <noDataValue>-9999</noDataValue>
      <outputContrast>true</outputContrast>
      <outputDissimilarity>true</outputDissimilarity>
      <outputHomogeneity>true</outputHomogeneity>
      <outputASM>true</outputASM>
      <outputEnergy>true</outputEnergy>
      <outputMAX>true</outputMAX>
      <outputEntropy>true</outputEntropy>
      <outputMean>true</outputMean>
      <outputVariance>true</outputVariance>
      <outputCorrelation>true</outputCorrelation>
    </parameters>
  </node>

   <node id="BandMerge">
    <operator>BandMerge</operator>
    <sources>
      <sourceProduct refid="GLCM"/>
      <sourceProduct.1 refid="Read"/>
    </sources>
    <parameters>
      <geographicError>1.0E-5</geographicError>
    </parameters>
  </node>

  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="BandMerge"/>
    </sources>
    <parameters>
      <file>{OUTPUT}</file>
      <formatName>GeoTIFF-BigTIFF</formatName>
    </parameters>
  </node>

</graph>""",
    
    "BETA_TC":"""
<graph id="BETA_TC">
  <version>1.0</version>

  <node id="Read">
    <operator>Read</operator>
    <parameters>
      <file>{INPUT}</file>
    </parameters>
  </node>

  <node id="Apply-Orbit-File">
    <operator>Apply-Orbit-File</operator>
    <sources>
      <sourceProduct refid="Read"/>
    </sources>
    <parameters>
      <orbitType>Sentinel Precise (Auto Download)</orbitType>
      <polyDegree>3</polyDegree>
    </parameters>
  </node>

  <node id="ThermalNoiseRemoval">
    <operator>ThermalNoiseRemoval</operator>
    <sources>
      <sourceProduct refid="Apply-Orbit-File"/>
    </sources>
    <parameters>
      <selectedPolarisations>VH,VV</selectedPolarisations>
      <removeThermalNoise>true</removeThermalNoise>
    </parameters>
  </node>

  <node id="Calibration">
    <operator>Calibration</operator>
    <sources>
      <sourceProduct refid="ThermalNoiseRemoval"/>
    </sources>
    <parameters>
      <outputBetaBand>true</outputBetaBand>
      <outputGammaBand>false</outputGammaBand>
      <outputSigmaBand>false</outputSigmaBand>
    </parameters>
  </node>

  <node id="Speckle-Filter">
    <operator>Speckle-Filter</operator>
    <sources>
      <sourceProduct refid="Calibration"/>
    </sources>
    <parameters>
      <filter>Lee</filter>
      <windowSize>7x7</windowSize>
    </parameters>
  </node>

  <node id="Terrain-Correction">
    <operator>Terrain-Correction</operator>
    <sources>
      <sourceProduct refid="Speckle-Filter"/>
    </sources>
    <parameters>
      <demName>SRTM 1Sec HGT</demName>
      <mapProjection>{EPSG}</mapProjection>
      <pixelSpacingInMeter>25.0</pixelSpacingInMeter>
      <saveLocalIncidenceAngle>true</saveLocalIncidenceAngle>
      <saveDEM>true</saveDEM>
      <nodataValueAtSea>true</nodataValueAtSea>
    </parameters>
  </node>


  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="Terrain-Correction"/>
    </sources>
    <parameters>
      <file>{OUTPUT}</file>
      <formatName>BEAM-DIMAP</formatName>
    </parameters>
  </node>
  </graph>""",

    "BETA_FEAT": """<graph id="BETA_FEAT">
  <version>1.0</version>

  <node id="Read">
    <operator>Read</operator>
    <parameters>
      <file>{INPUT}</file>
    </parameters>
  </node>

  <node id="GLCM">
    <operator>GLCM</operator>
    <sources>
      <sourceProduct refid="Read"/>
    </sources>
    <parameters>
      <!-- Both polarisations in one go -->
      <sourceBands>Beta0_VH,Beta0_VV</sourceBands>
      <windowSizeStr>5x5</windowSizeStr>
      <angleStr>ALL</angleStr>
      <quantizerStr>Probabilistic Quantizer</quantizerStr>
      <quantizationLevelsStr>64</quantizationLevelsStr>
      <displacement>1</displacement>
      <noDataValue>-9999</noDataValue>

      <outputContrast>true</outputContrast>
      <outputDissimilarity>true</outputDissimilarity>
      <outputHomogeneity>true</outputHomogeneity>
      <outputASM>true</outputASM>
      <outputEnergy>true</outputEnergy>
      <outputMAX>true</outputMAX>
      <outputEntropy>true</outputEntropy>
      <outputMean>true</outputMean>
      <outputVariance>true</outputVariance>
      <outputCorrelation>true</outputCorrelation>
    </parameters>
  </node>

  <!-- Keep originals (Beta0_VH/VV, plus LIA/DEM if present) -->
  <node id="BandMerge">
    <operator>BandMerge</operator>
    <sources>
      <sourceProduct refid="GLCM"/>
      <sourceProduct.1 refid="Read"/>
    </sources>
    <parameters>
      <geographicError>1.0E-5</geographicError>
    </parameters>
  </node>

  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="BandMerge"/>
    </sources>
    <parameters>
      <file>{OUTPUT}</file>
      <formatName>GeoTIFF-BigTIFF</formatName>
    </parameters>
  </node>

</graph>"""

}

def render_graph(name: str, **kw) -> str:
    """Fill placeholders in an embedded template."""
    xml = XMLS[name].format(**kw)
    # Safety: strip any accidental empty pixelRegion tags like <pixelRegion/>
    xml = xml.replace("<pixelRegion/>", "<pixelRegion></pixelRegion>")
    return xml

def write_graph(name: str, out_path: Path, **kw) -> Path:
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(render_graph(name, **kw), encoding="utf-8")
    return out_path


In [7]:
# --- Paths for branches
GS = cfg.work_root / "GammaSigma"
BETA = cfg.work_root / "Beta"
for d in [GS/"TF", GS/"TC", GS/"Composite", GS/"Features", cfg.exports_root/"SingleBands",
          BETA/"Stage1", BETA/"TC", BETA/"Composite", BETA/"Features"]:
    d.mkdir(parents=True, exist_ok=True)

# Use template *names* (keys), not the XML text
XML_TF     = "GS_TF"
XML_TC     = "GS_TC"
XML_FEAT   = "GS_FEAT"
XML_BETA_1 = "BETA_TC"
XML_BETA_3 = "BETA_FEAT"

def dim_band_names(dim_path: Path) -> list[str]:
    root = ET.parse(dim_path).getroot()
    names = [b.findtext("Name") for b in root.findall(".//BandList/Band")]
    return [n for n in names if n]

def preflight_bands(dim_path: Path, expected: list[str], label: str = "PRODUCT"):
    found = dim_band_names(dim_path)
    missing = [e for e in expected if e not in found]
    if missing:
        print(f"\n[PRECHECK FAIL] {label}: {dim_path.name}")
        print("Expected:", expected)
        print("Found   :", found)
        print("Missing :", missing)
        raise SystemExit(1)
    print(f"[PRECHECK OK] {label}: {dim_path.name} ({len(found)} bands)")

# ---- Expected contracts ----
GS_EXPECT = ["Gamma0_VH","Sigma0_VH","Gamma0_VV","Sigma0_VV","elevation","localIncidenceAngle"]
BETA_EXPECT = ["Beta0_VH","Beta0_VV","elevation","localIncidenceAngle"]

def run_tf_tc_gs():
    inputs = sort_by_time(find_grd_inputs(cfg.input_root))
    print("Found", len(inputs), "GRD inputs")
    tc_list = []

    for src in inputs:
        stem  = src.stem.replace(".SAFE","")
        tf_dim = GS/"TF"/f"{stem}_Orb_NR_Cal_TF.dim"
        tc_dim = GS/"TC"/f"{stem}_TC.dim"

        # ✅ Skip if TC already exists
        if tc_dim.exists():
            print(f"[SKIP] TF+TC already done for {stem}")
            tc_list.append(tc_dim)
            continue

        # --- TF stage ---
        tf_xml = GS/"TF"/f"{stem}_TF_RUN.xml"
        write_graph(XML_TF, tf_xml,
                    INPUT=str(src),
                    OUTPUT=str(tf_dim))
        run_gpt(tf_xml, cfg.gpt_mem)

        # --- TC stage ---
        tc_xml = GS/"TC"/f"{stem}_TC_RUN.xml"
        write_graph(XML_TC, tc_xml,
                    INPUT=str(tf_dim),
                    OUTPUT=str(tc_dim),
                    EPSG=cfg.epsg)
        run_gpt(tc_xml, cfg.gpt_mem)

        # Clean up TF to save disk
        try:
            tf_data = tf_dim.with_suffix(".data")
            if tf_data.exists():
                shutil.rmtree(tf_data)
            if tf_dim.exists():
                tf_dim.unlink()
        except Exception:
            pass

        preflight_bands(tc_dim, GS_EXPECT, "GS_TC")
        tc_list.append(tc_dim)

    return sorted(tc_list)

def write_composite_graph_gs(tc_dims, out_xml: Path, out_dim: Path, ref_idx: int = 0) -> Path:
    assert len(tc_dims) >= 2, "Need ≥2 scenes for GS composite"

    # Read, Read(2), Read(3)...
    read_nodes = []
    for i, src in enumerate(tc_dims):
        nid = "Read" if i == 0 else f"Read({i+1})"
        read_nodes.append(f"""
  <node id="{nid}">
    <operator>Read</operator>
    <parameters>
      <file>{src}</file>
    </parameters>
  </node>""")

    # Sources list matching those ids (first is 'Read', then 'Read(2)', ...)
    src_lines = ['      <sourceProduct refid="Read" />'] + \
                [f'      <sourceProduct.{k} refid="Read({k+1})" />' for k in range(1, len(tc_dims))]

    # No referenceProductName -> defaults to first source (safe)
    collocate_node = f"""
  <node id="Collocate">
    <operator>Collocate</operator>
    <sources>
{chr(10).join(src_lines)}
    </sources>
    <parameters>
      <resamplingType>BILINEAR_INTERPOLATION</resamplingType>
      <renameReferenceComponents>true</renameReferenceComponents>
      <renameSecondaryComponents>true</renameSecondaryComponents>
      <referenceComponentPattern>${{ORIGINAL_NAME}}_M</referenceComponentPattern>
      <secondaryComponentPattern>${{ORIGINAL_NAME}}_S${{SLAVE_NUMBER_ID}}</secondaryComponentPattern>
    </parameters>
  </node>"""

    def avg_expr(basename: str) -> str:
        # S0..S{N-2} matches SLAVE_NUMBER_ID starting at 0
        terms = [f"{basename}_M"] + [f"{basename}_S{i}" for i in range(0, len(tc_dims)-1)]
        return "avg(" + ",".join(terms) + ")"

    bm_nodes = []
    for band, out_name in [
        ("Gamma0_VH","Gamma0_VH"),
        ("Gamma0_VV","Gamma0_VV"),
        ("Sigma0_VH","Sigma0_VH"),
        ("Sigma0_VV","Sigma0_VV"),
        ("localIncidenceAngle","LIA"),
        ("elevation","DEM"),
    ]:
        bm_nodes.append(f"""
  <node id="BM_{out_name}">
    <operator>BandMaths</operator>
    <sources>
      <sourceProduct refid="Collocate"/>
    </sources>
    <parameters>
      <targetBands>
        <targetBand>
          <name>{out_name}</name>
          <type>float32</type>
          <expression>{avg_expr(band)}</expression>
          <noDataValue>-9999</noDataValue>
        </targetBand>
      </targetBands>
    </parameters>
  </node>""")

    bandmerge_node = """
  <node id="BandMerge">
    <operator>BandMerge</operator>
    <sources>
      <sourceProduct refid="BM_Gamma0_VH"/>
      <sourceProduct.1 refid="BM_Gamma0_VV"/>
      <sourceProduct.2 refid="BM_Sigma0_VH"/>
      <sourceProduct.3 refid="BM_Sigma0_VV"/>
      <sourceProduct.4 refid="BM_LIA"/>
      <sourceProduct.5 refid="BM_DEM"/>
    </sources>
    <parameters>
      <geographicError>1.0E-5</geographicError>
    </parameters>
  </node>"""

    write_node = f"""
  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="BandMerge"/>
    </sources>
    <parameters>
      <file>{out_dim}</file>
      <formatName>BEAM-DIMAP</formatName>
    </parameters>
  </node>"""

    graph = f"""<graph id="GS_COMP">
  <version>1.0</version>
{''.join(read_nodes)}
{collocate_node}
{''.join(bm_nodes)}
{bandmerge_node}
{write_node}
</graph>
"""
    out_xml.parent.mkdir(parents=True, exist_ok=True)
    out_xml.write_text(dedent(graph), encoding="utf-8")
    return out_xml

def write_composite_graph_beta(tc_dims, out_xml: Path, out_dim: Path):
    assert len(tc_dims) >= 2, "Need ≥2 scenes for Beta composite"

    # Reads
    read_nodes = []
    for i, src in enumerate(tc_dims):
        nid = "Read" if i == 0 else f"Read({i+1})"
        read_nodes.append(f"""
  <node id="{nid}">
    <operator>Read</operator>
    <parameters>
      <file>{src}</file>
    </parameters>
  </node>""")

    src_lines = ['      <sourceProduct refid="Read"/>'] + \
                [f'      <sourceProduct.{k} refid="Read({k+1})"/>' for k in range(1, len(tc_dims))]

    collocate_node = f"""
  <node id="Collocate">
    <operator>Collocate</operator>
    <sources>
{chr(10).join(src_lines)}
    </sources>
    <parameters>
      <resamplingType>BILINEAR_INTERPOLATION</resamplingType>
      <renameReferenceComponents>true</renameReferenceComponents>
      <renameSecondaryComponents>true</renameSecondaryComponents>
      <referenceComponentPattern>${{ORIGINAL_NAME}}_M</referenceComponentPattern>
      <secondaryComponentPattern>${{ORIGINAL_NAME}}_S${{SLAVE_NUMBER_ID}}</secondaryComponentPattern>
    </parameters>
  </node>"""

    def avg_expr(basename: str) -> str:
        terms = [f"{basename}_M"] + [f"{basename}_S{i}" for i in range(0, len(tc_dims)-1)]
        return "avg(" + ",".join(terms) + ")"

    bm_nodes = []
    for band, out_name in [("Beta0_VH","BETA0_VH"), ("Beta0_VV","BETA0_VV")]:
        bm_nodes.append(f"""
  <node id="BM_{out_name}">
    <operator>BandMaths</operator>
    <sources>
      <sourceProduct refid="Collocate"/>
    </sources>
    <parameters>
      <targetBands>
        <targetBand>
          <name>{out_name}</name>
          <type>float32</type>
          <expression>{avg_expr(band)}</expression>
          <noDataValue>-9999</noDataValue>
        </targetBand>
      </targetBands>
    </parameters>
  </node>""")

    bandmerge_node = """
  <node id="BandMerge">
    <operator>BandMerge</operator>
    <sources>
      <sourceProduct refid="BM_BETA0_VH"/>
      <sourceProduct.1 refid="BM_BETA0_VV"/>
    </sources>
    <parameters>
      <geographicError>1.0E-5</geographicError>
    </parameters>
  </node>"""

    write_node = f"""
  <node id="Write">
    <operator>Write</operator>
    <sources>
      <sourceProduct refid="BandMerge"/>
    </sources>
    <parameters>
      <file>{out_dim}</file>
      <formatName>BEAM-DIMAP</formatName>
    </parameters>
  </node>"""

    graph = f"""<graph id="BETA_COMP">
  <version>1.0</version>
{''.join(read_nodes)}
{collocate_node}
{''.join(bm_nodes)}
{bandmerge_node}
{write_node}
</graph>
"""
    out_xml.parent.mkdir(parents=True, exist_ok=True)
    out_xml.write_text(dedent(graph), encoding="utf-8")
    return out_xml


def run_composite(tc_dims, out_dir: Path) -> Path:
    """
    GS composite runner using the GS writer above.
    """
    use = cap_scenes(tc_dims, cfg.max_inputs_per_comp, policy="latest")
    use = sort_by_time(use)
    if len(use) == 0:
        raise RuntimeError("No TC scenes found for composite.")
        if len(use) == 1:
            print("⚠️ Only 1 scene available — skipping Collocate+avg composite; using single TC product directly.")
            return use[0]  # <-- IMPORTANT: returns a .dim that run_features can read

    first_tok, last_tok = sense_token(use[0].stem), sense_token(use[-1].stem)
    comp_dim = out_dir / f"Composite_{first_tok}_{last_tok}.dim"
    if comp_dim.exists():
        print("[SKIP] Composite exists:", comp_dim.name)
        return comp_dim

    comp_xml = out_dir / f"Composite_{first_tok}_{last_tok}_RUN.xml"
    write_composite_graph_gs(use, comp_xml, comp_dim, ref_idx=0)
    run_gpt(comp_xml, cfg.gpt_mem)
    return comp_dim
    
def run_features(comp_dim: Path, out_dir: Path, template_key: str):
    first = comp_dim.stem.replace("Composite_","")
    feat_tif = out_dir / f"Features_{first}.tif"

    if feat_tif.exists():
        print(f"[SKIP] Features exists: {feat_tif.name}")
        return feat_tif

    run_xml = out_dir / f"Features_{first}_RUN.xml"
    write_graph(template_key, run_xml, INPUT=str(comp_dim), OUTPUT=str(feat_tif))
    run_gpt(run_xml, cfg.gpt_mem)
    return feat_tif


# Orchestrators (final)
def run_gamma_sigma_full():
    tc_dims = run_tf_tc_gs()                         # per-scene TF+TC
    comp    = run_composite(tc_dims, GS/"Composite") # auto-builds Collocate+avg graph
    feat    = run_features(comp, GS/"Features", "GS_FEAT")
    return comp, feat


def run_beta_full():
    inputs = sort_by_time(find_grd_inputs(cfg.input_root))
    stage1 = []  # <-- create first

    for src in inputs:
        stem = src.stem.replace(".SAFE","")
        out_dim = BETA/"Stage1"/f"{stem}_BETA_STAGE1.dim"
        run_xml = BETA/"Stage1"/f"{stem}_BETA_STAGE1_RUN.xml"

        if not out_dim.exists():
            write_graph("BETA_TC", run_xml, INPUT=str(src), OUTPUT=str(out_dim), EPSG=cfg.epsg)
            run_gpt(run_xml, cfg.gpt_mem)

        preflight_bands(out_dim, BETA_EXPECT, "BETA_STAGE1")
        stage1.append(out_dim)

    stage1 = sort_by_time(stage1)  # <-- sort AFTER building (and after skip logic)

    if len(stage1) == 0:
        raise RuntimeError("No Beta Stage1 products found.")
    if len(stage1) == 1:
        print("⚠️ Only 1 Beta scene — skipping composite; running features on single Stage1.")
        comp_dim = stage1[0]
        first_tok = last_tok = sense_token(comp_dim.stem)
    else:
        first_tok, last_tok = sense_token(stage1[0].stem), sense_token(stage1[-1].stem)
        comp_dim = BETA/"Composite"/f"Composite_{first_tok}_{last_tok}.dim"
        comp_xml = BETA/"Composite"/f"Composite_{first_tok}_{last_tok}_RUN.xml"

        if not comp_dim.exists():
            write_composite_graph_beta(stage1, comp_xml, comp_dim)
            run_gpt(comp_xml, cfg.gpt_mem)

    feat_tif = BETA/"Features"/f"Features_{first_tok}_{last_tok}.tif"
    run_xml = BETA/"Features"/f"Features_{first_tok}_{last_tok}_RUN.xml"
    if not feat_tif.exists():
        write_graph("BETA_FEAT", run_xml, INPUT=str(comp_dim), OUTPUT=str(feat_tif))
        run_gpt(run_xml, cfg.gpt_mem)

    return comp_dim, feat_tif



In [9]:
def split_gamma_sigma(features_tif: Path):
    sources  = ["GAMMA0_VH","GAMMA0_VV","SIGMA0_VH","SIGMA0_VV"]
    metrics  = ["CONTRAST","DISSIMILARITY","HOMOGENEITY","ASM","ENERGY","MAX",
                "ENTROPY","MEAN","VARIANCE","CORRELATION"]
    plain    = ["GAMMA0_VH","GAMMA0_VV","SIGMA0_VH","SIGMA0_VV","LIA","DEM"]

    n = gdal_band_count(features_tif)
    assert n == 46, f"Expected 46 bands (got {n})"

    def var(idx):
        if 1 <= idx <= 40:
            s=(idx-1)//10; m=(idx-1)%10
            fam, pol = sources[s].split("_")
            fam=fam.replace("0","")
            return f"GLCM{fam}{pol}{metrics[m]}"
        else:
            return plain[idx-41]

    outdir = cfg.exports_root / "SingleBands"
    outdir.mkdir(parents=True, exist_ok=True)

    for i in range(1, n+1):
        token = var(i)
        if token in ("GAMMA0_VH","GAMMA0_VV","SIGMA0_VH","SIGMA0_VV"):
            fam, pol = token.split("_")
            token = f"{fam}{pol}"  # e.g. GAMMA0_VV -> GAMMA0VV

        out_name = build_name(token)
        out_path = outdir / out_name
        if out_path.exists():
            print("[SKIP] band", i, "exists:", out_name)
            continue

        subprocess.run([
            "gdal_translate", str(features_tif), str(out_path),
            "-b", str(i), "-a_nodata", "-9999",
            "-co","COMPRESS=LZW","-co","TILED=YES","-co","BIGTIFF=YES"
        ], check=True)
        print(f"-> {out_name}")

    print("✓ Gamma and Sigma band split complete.")


def split_beta(features_tif: Path):
    """
    Split the Beta Features stack into single-band GeoTIFFs with correct names,
    NoData = -9999, and idempotency. Handles 22- or 24-band stacks.
    """
    sources  = ["BETA0_VH", "BETA0_VV"]
    metrics  = ["CONTRAST","DISSIMILARITY","HOMOGENEITY","ASM","ENERGY","MAX",
                "ENTROPY","MEAN","VARIANCE","CORRELATION"]

    n = gdal_band_count(features_tif)
    print(f"Found {n} bands in {features_tif.name}")
    if n not in (22, 24):
        raise AssertionError(f"Expected 22 or 24 bands in Beta Features. Got {n}.")

    def token_for(idx: int) -> str:
        # 1..20 = GLCM: 10 metrics per pol, order follows the sourceBands (VH, VV)
        if 1 <= idx <= 20:
            s = (idx - 1) // 10      # 0 => VH, 1 => VV
            m = (idx - 1) % 10
            fam, pol = sources[s].split("_")    # BETA0, VH/VV
            fam = fam.replace("0","")           # BETA
            return f"GLCM{fam}{pol}{metrics[m]}"  # e.g., GLCMBETAVHCONTRAST

        # 21.. = plain bands
        if idx == 21: return "BETA0_VH"
        if idx == 22: return "BETA0_VV"
        if n == 24 and idx == 23: return "LIA"
        if n == 24 and idx == 24: return "DEM"
        raise ValueError(f"Unexpected band index {idx} for n={n}")

    outdir = cfg.exports_root / "SingleBands"
    outdir.mkdir(parents=True, exist_ok=True)

    for i in range(1, n + 1):
        token = token_for(i)

        # Merge underscores in plain backscatter to match convention: BETA0_VV -> BETA0VV
        if token in ("BETA0_VH","BETA0_VV"):
            fam, pol = token.split("_")
            token = f"{fam}{pol}"

        out_name = build_name(token)
        out_path = outdir / out_name
        if out_path.exists():
            print(f"[SKIP] band {i} exists: {out_name}")
            continue

        subprocess.run([
            "gdal_translate", str(features_tif), str(out_path),
            "-b", str(i),
            "-a_nodata", "-9999",
            "-co","COMPRESS=LZW","-co","TILED=YES","-co","BIGTIFF=YES"
        ], check=True)
        print(f"-> {out_name}")

    print("✓ Beta band split complete.")


In [11]:
# Gamma/Sigma branch
comp_gs, feat_gs = run_gamma_sigma_full()
split_gamma_sigma(feat_gs)

Found 6 GRD inputs
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\GammaSigma\TF\S1A_IW_GRDH_1SDV_20200509T183527_20200509T183552_032492_03C34B_9401_TF_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\GammaSigma\TC\S1A_IW_GRDH_1SDV_20200509T183527_20200509T183552_032492_03C34B_9401_TC_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\GammaSigma\TF\S1A_IW_GRDH_1SDV_20200602T183528_20200602T183553_032842_03CDD5_BC98_TF_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\GammaSigma\TC\S1A_IW_GRDH_1SDV_20200602T183528_20200602T183553_032842_03CDD5_BC98_TC_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\GammaSigma\TF\S1A_IW_GRDH_1SDV_20200708T183530_20200708T183555_033367_03DDAB_D6EE_TF_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\GammaSigma\TC\S1A_IW_GRDH_1SDV_20200708T183530_20200708T183555_033367_03DDAB_D6EE_TC_RUN.x

In [11]:
# Beta branch (when ready)
comp_b, feat_b = run_beta_full()
split_beta(feat_b)

RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\Beta\Stage1\S1A_IW_GRDH_1SDV_20200509T183527_20200509T183552_032492_03C34B_9401_BETA_STAGE1_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\Beta\Stage1\S1A_IW_GRDH_1SDV_20200602T183528_20200602T183553_032842_03CDD5_BC98_BETA_STAGE1_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\Beta\Stage1\S1A_IW_GRDH_1SDV_20200708T183530_20200708T183555_033367_03DDAB_D6EE_BETA_STAGE1_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\Beta\Stage1\S1A_IW_GRDH_1SDV_20200801T183532_20200801T183557_033717_03E868_5CAC_BETA_STAGE1_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\Beta\Stage1\S1A_IW_GRDH_1SDV_20200906T183534_20200906T183559_034242_03FAA2_B743_BETA_STAGE1_RUN.xml -c 6G
RUN: C:\Program Files\esa-snap\bin\gpt.exe M:\Project BLS\S1\Work\Beta\Stage1\S1A_IW_GRDH_1SDV_20201012T183535_20201012T183600_034767_040D12