### 4. Test NVDI Scenes

### 1. Imports

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import rasterio

from thess_geo_analytics.utils.RepoPaths import RepoPaths
from thess_geo_analytics.pipelines.BuildNdviScenePipeline import (
    BuildNdviScenePipeline,
    BuildNdviSceneParams,
)


### 2. Parameters 

In [2]:
month = "2026-01"
manifest_path = RepoPaths.table(f"assets_manifest_{month}.csv")

if not manifest_path.exists():
    raise RuntimeError(
        f"Missing manifest: {manifest_path}\n"
        "Run assets manifest pipeline first (and download assets)."
    )

df = pd.read_csv(manifest_path)

# pick best scene (lowest cloud)
df_sorted = df.sort_values(["cloud_cover", "datetime"], ascending=[True, True])
scene_id = df_sorted.iloc[0]["scene_id"]

print("Month:", month)
print("Scene chosen:", scene_id)
df_sorted.head(3)[["scene_id", "datetime", "cloud_cover"]]


Month: 2026-01
Scene chosen: S2A_MSIL2A_20260101T091421_N0511_R050_T34TFL_20260101T111709


Unnamed: 0,scene_id,datetime,cloud_cover
0,S2A_MSIL2A_20260101T091421_N0511_R050_T34TFL_2...,2026-01-01T09:14:21.024000+00:00,0.05
1,S2A_MSIL2A_20260101T091421_N0511_R050_T34TFK_2...,2026-01-01T09:14:21.024000+00:00,0.14
2,S2A_MSIL2A_20260101T091421_N0511_R050_T34TGL_2...,2026-01-01T09:14:21.024000+00:00,0.58


### 3. Check local assets exists

In [4]:
row = df[df["scene_id"] == scene_id].iloc[0]
paths = [Path(row["local_b04"]), Path(row["local_b08"]), Path(row["local_scl"])]

missing = [p for p in paths if not p.exists()]
if missing:
    raise RuntimeError(
        "Missing local assets for the chosen scene:\n"
        + "\n".join(str(p) for p in missing)
        + "\n\nRun assets manifest with download enabled."
    )

print("Local assets exist")


Local assets exist


### 4. Run pipeline (produces tmp tif + png)

In [5]:
pipe = BuildNdviScenePipeline()
out_tif, out_png = pipe.run(BuildNdviSceneParams(month=month, scene_id=scene_id))

print("Wrote:", out_tif)
print("Wrote:", out_png)


AttributeError: 'NdviProcessor' object has no attribute 'write_geotiff'

### 5. Validate NDVI in [-1, 1] + nodata exists (cloud masking)

In [None]:
with rasterio.open(out_tif) as ds:
    arr = ds.read(1).astype(np.float32)
    nodata = ds.nodata
    if nodata is not None:
        arr = np.where(arr == nodata, np.nan, arr)

valid = arr[~np.isnan(arr)]
if valid.size == 0:
    raise RuntimeError("FAIL: all pixels are masked (no valid NDVI).")

mn, mx = float(valid.min()), float(valid.max())
print("NDVI min/max:", mn, mx)

if mn < -1.0001 or mx > 1.0001:
    raise RuntimeError(f"FAIL: NDVI out of bounds: {mn}..{mx}")

# Check nodata exists somewhere (cloud/shadow masked)
nan_ratio = float(np.isnan(arr).sum() / arr.size)
print("Masked pixel ratio (nan):", nan_ratio)

if nan_ratio == 0.0:
    print("WARNING: no masked pixels found. Possible: no clouds in this scene OR masking mismatch.")
else:
    print("Masking present (nodata pixels exist)")


NameError: name 'out_tif' is not defined

### 6. Quick preview inside notebook

In [7]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6,6))
plt.imshow(arr, vmin=-0.2, vmax=0.8)
plt.title(f"NDVI Scene preview\n{scene_id}")
plt.axis("off")
plt.tight_layout()
plt.show()


NameError: name 'arr' is not defined

<Figure size 600x600 with 0 Axes>