In [8]:
import earthaccess
import os
import netCDF4 as nc
import numpy as np
import pandas as pd
import geopandas as gpd
earthaccess.login(strategy="netrc")
print("Authenticated")

datasets = earthaccess.search_datasets(
    keyword="SWOT L4 discharge",
    count=50
)

for d in datasets:
    print(d["umm"]["ShortName"])

    SHORT_NAME = "SWOT_L4_HR_DAWG_SOS_DISCHARGE_V3"

granules = earthaccess.search_data(
    short_name=SHORT_NAME,
    sort_key="-start_date",
    count=1
)

print("Granules found:", len(granules))

granules = earthaccess.search_data(
    short_name=SHORT_NAME,
    count=50  # enough to list all regions
)

eu_granules = [
    g for g in granules
    if g["meta"]["native-id"].startswith("eu_")
]

paths = earthaccess.download(
    eu_granules,
    local_path="./downloaded_files/sos_europe"
)

paths

print(paths[0])

# -------------------------
# paths
# -------------------------
NC_PATH = r"downloaded_files\sos_europe\eu_sword_v16_SOS_results_unconstrained_20230502T204408_20250502T204408_20251219T163700.nc"
GEOJSON_PATH = r"C:\UNESCO\Code\data\Dnipro_basin_shapefiles\derived\dnipro_sword_reaches_clip.geojson"

OUT_DIR = r"C:\UNESCO\Code\data\Dnipro_consensus_q_csvs"
os.makedirs(OUT_DIR, exist_ok=True)

# -------------------------
# read reach_ids from geojson
# -------------------------
gdf = gpd.read_file(GEOJSON_PATH)

# Try common field names; adjust if yours differs
reach_field_candidates = ["reach_id", "ReachID", "REACH_ID", "sword_reach_id", "SWORD_REACH_ID"]
reach_field = next((c for c in reach_field_candidates if c in gdf.columns), None)
if reach_field is None:
    raise ValueError(f"Could not find a reach id field in GeoJSON. Columns: {list(gdf.columns)}")

dnipro_reach_ids = (
    pd.to_numeric(gdf[reach_field], errors="coerce")
      .dropna()
      .astype("int64")
      .unique()
)

print(f"GeoJSON reaches: {len(dnipro_reach_ids)}")

# -------------------------
# open netcdf + prep indexing
# -------------------------
ds = nc.Dataset(NC_PATH, "r")
reaches = ds.groups["reaches"]
consensus = ds.groups["consensus"]

nc_reach_ids = reaches.variables["reach_id"][:].astype("int64")

# map reach_id -> index in netcdf (fast lookup)
id_to_idx = {int(rid): int(i) for i, rid in enumerate(nc_reach_ids)}

# locate fill/missing for consensus_q
qvar = consensus.variables["consensus_q"]
missing = None
if "_FillValue" in qvar.ncattrs():
    missing = qvar.getncattr("_FillValue")
elif "missing_value" in qvar.ncattrs():
    missing = qvar.getncattr("missing_value")

time_var = consensus.variables["time_int"]

# -------------------------
# export per reach
# -------------------------
skipped_not_found = []
written = 0

for rid in dnipro_reach_ids:
    rid_int = int(rid)
    if rid_int not in id_to_idx:
        skipped_not_found.append(rid_int)
        continue

    i = id_to_idx[rid_int]

    # vlen time array for this reach
    times = np.asarray(time_var[i], dtype="float64")
    valid_time = times > -9.0e10
    times_valid = times[valid_time].astype("int64")

    if times_valid.size == 0:
        # still write an empty CSV with headers
        df = pd.DataFrame({"reach_id": [], "date": [], "consensus_q": []})
    else:
        dates = np.array([
            np.datetime64("2000-01-01") + np.timedelta64(int(t), "s")
            for t in times_valid
        ])

        # vlen discharge array for this reach
        q = np.asarray(qvar[i], dtype="float64")[valid_time]

        if missing is not None:
            q[q == missing] = np.nan
        q[q <= -9.0e10] = np.nan

        df = pd.DataFrame({
            "reach_id": rid_int,
            "date": dates.astype("datetime64[ns]"),
            "consensus_q": q
        })

    out_path = os.path.join(OUT_DIR, f"reach_{rid_int}_consensus_q.csv")
    df.to_csv(out_path, index=False)
    written += 1

ds.close()

print(f"Wrote {written} CSVs to: {OUT_DIR}")
if skipped_not_found:
    print(f"Skipped {len(skipped_not_found)} reaches not found in NetCDF. Example(s): {skipped_not_found[:10]}")
 

Authenticated
SWOT_L4_HR_DAWG_SOS_DISCHARGE_V3
Granules found: 1


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<?, ?it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:00<?, ?it/s]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<?, ?it/s]


downloaded_files\sos_europe\eu_sword_v16_SOS_results_unconstrained_20230502T204408_20250502T204408_20251219T163700.nc
GeoJSON reaches: 842
Wrote 559 CSVs to: C:\UNESCO\Code\data\Dnipro_consensus_q_csvs
Skipped 283 reaches not found in NetCDF. Example(s): [22601000045, 22511100015, 22511100041, 22511300291, 22511300281, 22511300301, 22511300063, 22511300051, 22511300084, 22511300093]
