## 1) Create 2 band greening geotiffs

### 1) Setup

In [None]:
# Cell 1: Setup

from pathlib import Path
import numpy as np
import rasterio
from tqdm.auto import tqdm
import pandas as pd


### 2) Configure paths

In [None]:
# Cell 2: Configure paths

BASE_29 = Path(r"C:\temp\timor_leste\greening\v3_dynamic_world_harmonised")
BASE_15 = BASE_29 / "15_bands"

EXPORTS = BASE_29 / "exports"
EXPORTS.mkdir(parents=True, exist_ok=True)

print("29-band input :", BASE_29)
print("15-band input :", BASE_15)
print("Unified export:", EXPORTS)


### 3) Helper: max-with-nodata

In [None]:
# Cell 3: Helper to handle nodata in max calc
def compute_max_ignoring_nodata(bands_stack: np.ndarray, nodata):
    if nodata is None:
        return np.maximum.reduce(bands_stack, axis=0)
    masked = np.ma.masked_equal(bands_stack, nodata)
    return np.ma.max(masked, axis=0).filled(nodata)


def collect_files(folder: Path, patterns=("*.tif","*.tiff"), recursive=False):
    files = []
    globber = folder.rglob if recursive else folder.glob
    for pat in patterns:
        files.extend(globber(pat))
    return sorted(files)


### 4) Core processor (parameterized)

In [None]:
# Cell 4: Core processor (parameterized)
def process_file_param(in_path: Path,
                       out_dir: Path,
                       max_band_indices: list,
                       band2_index: int,
                       overwrite=False,
                       compress="lzw"):
    out_path = out_dir / f"{in_path.stem}_2bands.tif"
    if out_path.exists() and not overwrite:
        return "exists", out_path

    with rasterio.open(in_path) as src:
        required_max = max(max_band_indices + [band2_index])
        if src.count < required_max:
            return f"skip_requires_{required_max}_bands", f"has_{src.count}"

        profile = src.profile.copy()
        profile.update(count=2, compress=compress, tiled=True, bigtiff="IF_SAFER")
        nodata = src.nodata

        with rasterio.open(out_path, "w", **profile) as dst:
            for _, window in src.block_windows(1):
                stack = np.stack([src.read(b, window=window) for b in max_band_indices], axis=0)
                max_vals = compute_max_ignoring_nodata(stack, nodata)
                b2 = src.read(band2_index, window=window)

                dst.write(max_vals, 1, window=window)
                dst.write(b2, 2, window=window)

            if nodata is not None:
                dst.nodata = nodata
                dst.update_tags(1, nodata=nodata)
                dst.update_tags(2, nodata=nodata)

    return "written", out_path


### 5) Batch runner

In [None]:
# Cell 5: Batch runner
def run_batch(input_dir: Path, output_dir: Path, max_band_indices, band2_index, recursive=False):
    files = collect_files(input_dir, recursive=recursive)
    results = []
    for f in tqdm(files, desc=f"Processing {input_dir.name}", unit="file"):
        try:
            status, detail = process_file_param(
                f, output_dir,
                max_band_indices=max_band_indices,
                band2_index=band2_index,
                overwrite=False
            )
            results.append((f.name, status, str(detail)))
        except Exception as e:
            results.append((f.name, "error", str(e)))
    return results


### 6) Execute for the 29-band set

In [None]:
# Cell 6: Run for 29-band geotiffs (Band 1 = max(19-22), Band 2 = 29)
results_29 = run_batch(
    BASE_29, EXPORTS,
    max_band_indices=[19,20,21,22],
    band2_index=29
)
df_29 = pd.DataFrame(results_29, columns=["input_file","status","output"])
df_29


### 7) Execute for the 15-band set

In [None]:
# Cell 7: Run for 15-band geotiffs (Band 1 = max(5-8), Band 2 = 15)
results_15 = run_batch(
    BASE_15, EXPORTS,
    max_band_indices=[5,6,7,8],
    band2_index=15
)
df_15 = pd.DataFrame(results_15, columns=["input_file","status","output"])
df_15


### 8) Combined summary

In [None]:
# Cell 8: Combined summary
summary = (
    pd.concat([df_29.assign(dataset="29-band"),
               df_15.assign(dataset="15-band")],
              ignore_index=True)
    .pivot_table(index="status", columns="dataset", values="input_file",
                 aggfunc="count", fill_value=0)
    .reset_index()
)
summary


## 2) Create mosaic

In [None]:
# Cell 1: Setup

from pathlib import Path
import numpy as np
import rasterio
from rasterio.merge import merge
from rasterio.vrt import WarpedVRT
from rasterio.enums import Resampling
from tqdm.auto import tqdm


In [None]:
# Cell 2: Paths & output filename

EXPORTS = Path(r"C:\temp\timor_leste\greening\v3_dynamic_world_harmonised\exports")
EXPORTS.mkdir(parents=True, exist_ok=True)

OUT_MOSAIC = EXPORTS / "mosaic_timor_leste_2bands.tif"

print("Exports folder:", EXPORTS)
print("Mosaic output :", OUT_MOSAIC)


In [None]:
# Cell 3: Collect candidate GeoTIFFs (2-band exports)
def collect_tifs(folder: Path, patterns=("*.tif", "*.tiff")):
    files = []
    for pat in patterns:
        files.extend(folder.glob(pat))
    # Filter to keep only the 2-band products we created (optional but safer)
    two_band_files = []
    for f in files:
        try:
            with rasterio.open(f) as src:
                if src.count == 2:
                    two_band_files.append(f)
        except Exception:
            pass
    return sorted(two_band_files)

files = collect_tifs(EXPORTS)
print(f"Found {len(files)} 2-band GeoTIFF(s).")
for f in files[:10]:
    print("  -", f.name)

if len(files) == 0:
    raise SystemExit("No 2-band GeoTIFFs found in the exports folder.")


In [None]:
# Cell 4: Choose a target grid (CRS, resolution) using the first file

first_path = files[0]
with rasterio.open(first_path) as ref:
    dst_crs = ref.crs
    # Pixel sizes (x, y), y is typically negative; we keep absolute values
    xres = abs(ref.transform.a)
    yres = abs(ref.transform.e)
    dst_res = (xres, yres)
    dst_dtype = ref.dtypes[0]
    dst_nodata = ref.nodata  # can be None

print("Target CRS:", dst_crs)
print("Target res:", dst_res)
print("Target dtype:", dst_dtype)
print("Target nodata:", dst_nodata)


In [None]:
# Cell 5: Build VRTs and merge (keeping NaNs)

vrt_list = []
for path in tqdm(files, desc="Preparing VRTs"):
    src = rasterio.open(path)
    vrt = WarpedVRT(
        src,
        crs=dst_crs,
        resampling=Resampling.nearest,
        dst_resolution=dst_res,
        src_nodata=src.nodata,
        nodata=np.nan  # request NaN as nodata
    )
    vrt_list.append((src, vrt))

mosaic_arr, mosaic_transform = merge(
    [vrt for _, vrt in vrt_list],
    nodata=np.nan,
    dtype="float32"   # force float so NaNs are preserved
)

print("Mosaic shape (bands, rows, cols):", mosaic_arr.shape)
print("Array dtype:", mosaic_arr.dtype)

for src, vrt in vrt_list:
    vrt.close()
    src.close()


In [None]:
# Cell 6: Write mosaic with NaNs preserved

profile = {
    "driver": "GTiff",
    "dtype": "float32",       # ensures NaNs are valid
    "count": mosaic_arr.shape[0],
    "width": mosaic_arr.shape[2],
    "height": mosaic_arr.shape[1],
    "crs": dst_crs,
    "transform": mosaic_transform,
    "tiled": True,
    "compress": "lzw",
    "bigtiff": "IF_SAFER"
}

with rasterio.open(OUT_MOSAIC, "w", **profile) as dst:
    dst.write(mosaic_arr)

print("✅ Wrote mosaic with NaNs preserved:", OUT_MOSAIC)


In [None]:
# Cell: Band-2 summary metrics in hectares (0=no greening, 1=greening)

from pathlib import Path
import numpy as np
import pandas as pd
import rasterio

PIXEL_SIZE_M = 20  # meters
AREA_PER_PIXEL_HA = (PIXEL_SIZE_M * PIXEL_SIZE_M) / 10_000.0  # 400 m2 = 0.04 ha

out_csv = Path(OUT_MOSAIC).with_name("mosaic_band2_summary.csv")

counts = {0: 0, 1: 0}
total_valid = 0

with rasterio.open(OUT_MOSAIC) as src:
    if src.count < 2:
        raise ValueError("Expected a 2-band mosaic (Band 2 = greening class).")

    # Iterate window-by-window to stay memory-safe
    for _, window in src.block_windows(1):
        b2 = src.read(2, window=window)  # float mosaic with possible NaNs
        valid = np.isfinite(b2)          # treat only finite values; NaN is nodata

        if not np.any(valid):
            continue

        b2v = b2[valid]
        # Values should be 0 or 1; we’ll count exact matches
        counts[0] += np.count_nonzero(b2v == 0)
        counts[1] += np.count_nonzero(b2v == 1)
        total_valid += b2v.size

# Build summary table
rows = []
for cls, label in [(0, "No greening"), (1, "Greening")]:
    px = counts[cls]
    ha = px * AREA_PER_PIXEL_HA
    pct = (px / total_valid * 100.0) if total_valid > 0 else 0.0
    rows.append({"Class": label, "Pixels": px, "Area_ha": ha, "Percent_of_valid_%": pct})

df = pd.DataFrame(rows).sort_values("Class").reset_index(drop=True)

# Totals row
total_area = sum(r["Area_ha"] for r in rows)
df_total = pd.DataFrame([{
    "Class": "Total (valid)",
    "Pixels": total_valid,
    "Area_ha": total_area,
    "Percent_of_valid_%": 100.0 if total_valid > 0 else 0.0
}])

display(df)
display(df_total)

# Save CSV
df_out = pd.concat([df, df_total], ignore_index=True)
df_out.to_csv(out_csv, index=False)
print(f"Saved summary to: {out_csv}")
