This notebook substitutes the CARAVAN precipitation data with CHIRPS precipitation for the dataset.

In [1]:
from pathlib import Path
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [2]:
BACKUP_DIR = Path("../preparing_data/filtered_data/time_series")
DATA_DIR   = Path("./filtered_data_CHIRPS/time_series")
UYPRECIP_DIR = Path("./precip_timeseries")

In [6]:
PRECIP_VAR = "prcp_mm_day"
TIME_VAR = "date"

DATA_DIR.mkdir(parents=True, exist_ok=True)

for nc_in in BACKUP_DIR.glob("CAMELS_UY_*.nc"):
    basin_id = nc_in.stem
    print(basin_id)
    nc_out = DATA_DIR / nc_in.name
    csv_file = UYPRECIP_DIR / f"{basin_id}_precip.csv"

    print(f"\nProcessing basin: {basin_id}")

    # ---------- Load forcing ----------
    ds = xr.open_dataset(nc_in).load()

    if PRECIP_VAR not in ds or TIME_VAR not in ds.coords:
        print("  ⏭ Missing precip or time coord – copying unchanged")
        ds.to_netcdf(nc_out)
        ds.close()
        continue

    # ---------- Load gauge ----------
    if not csv_file.exists():
        print("  ⏭ No gauge data")
        # ds.to_netcdf(nc_out)
        # ds.close()
        continue

    # df_gauge = (
    #     pd.read_csv(csv_file, parse_dates=[TIME_VAR])
    #     .set_index(TIME_VAR)
    # )

    df_gauge = (
        pd.read_csv(csv_file, parse_dates=["time"])
        .rename(columns={"time": TIME_VAR})
        .set_index(TIME_VAR)
    )

    # if "precip_mm" not in df_gauge.columns:
    #     print("  ⏭ precip_mm missing – copying unchanged")
    #     ds.to_netcdf(nc_out)
    #     ds.close()
    #     continue

    # ---------- Align dates ----------
    ds_time = pd.to_datetime(ds[TIME_VAR].values)
    common_dates = ds_time.intersection(df_gauge.index)

    if len(common_dates) == 0:
        print("  ⏭ No overlapping dates")
        # ds.to_netcdf(nc_out)
        # ds.close()
        continue

    print(f"  ✔ Replacing precipitation for {len(common_dates)} days")

    # ---------- Replace ----------
    precip = ds[PRECIP_VAR].copy()
    precip.loc[{TIME_VAR: common_dates}] = (
        df_gauge.loc[common_dates, "precipitation"].values
    )
    # precip.attrs["source"] = "Gauge (INUMET)"
    ds[PRECIP_VAR] = precip

    ds.attrs["precip_update"] = "Gauge data used where available"

    # ---------- Write ----------
    ds.to_netcdf(nc_out)
    ds.close()

print("\n✅ All basins processed successfully.")

CAMELS_UY_7

Processing basin: CAMELS_UY_7
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_2

Processing basin: CAMELS_UY_2
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_16

Processing basin: CAMELS_UY_16
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_9

Processing basin: CAMELS_UY_9
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_5

Processing basin: CAMELS_UY_5
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_11

Processing basin: CAMELS_UY_11
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_6

Processing basin: CAMELS_UY_6
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_8

Processing basin: CAMELS_UY_8
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_3

Processing basin: CAMELS_UY_3
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_15

Processing basin: CAMELS_UY_15
  ✔ Replacing precipitation for 11322 days
CAMELS_UY_10

Processing basin: CAMELS_UY_10
  ✔ Replacing precipitation for 11322 days

✅ All basins processed successfully.
