In [None]:
import os
import warnings

import geopandas as gpd
import nivapy3 as nivapy
import pandas as pd
from IPython.display import clear_output

warnings.simplefilter("ignore")

# Improving suspended sediment coefficients in TEOTIL3

## Notebook 02: Estimate SS fluxes

This notebook uses NVE's GTS API to estimate daily flows for each catchment identified in notebook 01. Annual SS fluxes are then calculated using the OSPAR ratio estimator.

## 1. Read data

In [None]:
# Read data from notebook 01
dir_path = r"/home/jovyan/shared/common/teotil3/nve_ss_data"
cat_gdf = gpd.read_file(os.path.join(dir_path, "filtered_catchments.gpkg"))
df = pd.read_excel(os.path.join(dir_path, "filtered_data.xlsx"), sheet_name="data")
df = df.rename(columns={"SS_mgpl": "SS_mg/l"})

## 2. Estimate daily flows using GTS API

In [None]:
# Get flow data from the GTS API
# Loop over catchments of interest. GTS API occasionally times-out, so the code
# below includes a hacky solution to retry up to 'n_retries' times when this occurs.
# TO DO: Update nivapy.da.get_nve_gts_api_aggregated_time_series to properly handle
# timeout errors, then streamline this code.
n_retries = 10
df_list = []
for idx in range(len(cat_gdf)):
    clear_output(wait=True)
    print(f"Processing {idx+1}/{len(cat_gdf)}")
    retry_count = 0
    while retry_count < n_retries:
        try:
            # Get start and end year for this site
            stn_id = cat_gdf.iloc[idx]["station_id"]
            stn_chem_df = df.query("station_id == @stn_id").copy()
            stn_chem_df["year"] = stn_chem_df["date"].dt.year
            st_yr = stn_chem_df["year"].min()
            end_yr = stn_chem_df["year"].max()

            # Get data from GTS API
            q_df = nivapy.da.get_nve_gts_api_aggregated_time_series(
                cat_gdf.iloc[[idx]],
                ["gwb_q"],
                f"{st_yr}-01-01",
                f"{end_yr}-12-31",
                id_col="station_id",
            )
            break
        except ValueError:
            # No data for catchment. Move to next iteration
            q_df = None
            break
        except Exception as e:
            # Probably a TimeoutError. Retry
            retry_count += 1
            if retry_count >= n_retries:
                q_df = None
                break

    if q_df is None:
        continue

    cat_area = cat_gdf["area_km2"].iloc[idx]
    q_df["flow_m3/s"] = 1e6 * q_df["value_mean"] * cat_area / (1000 * 60 * 60 * 24)
    q_df = q_df[["datetime", "flow_m3/s"]]
    q_df.columns = ["date", "flow_m3/s"]

    # Resample to output frequency
    q_df = q_df.set_index("date").resample("D").mean().reset_index()
    q_df["station_id"] = stn_id
    q_df = q_df[["station_id", "date", "flow_m3/s"]]

    df_list.append(q_df)

q_df = pd.concat(df_list, axis="rows")

# Save
flow_csv_path = os.path.join(dir_path, "flows_gts-api.csv")
q_df.to_csv(flow_csv_path, index=False)

## 2. Calculate SS loads

In [None]:
stn_list = q_df["station_id"].unique().tolist()
df = df.query("station_id in @stn_list")
cat_gdf = cat_gdf.query("station_id in @stn_list")

df_list = []
for stn_id in stn_list:
    stn_chem_df = df.query("station_id == @stn_id").set_index("date")
    stn_q_df = q_df.query("station_id == @stn_id").set_index("date")
    del stn_chem_df["station_id"], stn_q_df["station_id"]

    stn_flux_df = nivapy.stats.estimate_fluxes(
        stn_q_df,
        stn_chem_df,
        base_freq="D",
        agg_freq="A",
        method="ospar_annual",
        st_date=None,
        end_date=None,
        plot_fold=None,
    )
    stn_flux_df["station_id"] = stn_id
    stn_flux_df = stn_flux_df.query("SS_kg > 0").reset_index()
    stn_flux_df = stn_flux_df[["station_id", "year", "SS_kg"]]
    df_list.append(stn_flux_df)

flux_df = pd.concat(df_list, axis="rows")

# Save
flux_csv_path = os.path.join(dir_path, "ss_fluxes.csv")
flux_df.to_csv(flux_csv_path, index=False)

flux_df.head()