In [None]:
# # Run this and then restart the kernel at the start of each session to install
# # 'teotil3' in development mode
# !pip install -e /home/jovyan/projects/teotil3/

In [1]:
import os

import contextily as cx
import geopandas as gpd
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import seaborn as sn
import teotil3 as teo
from sqlalchemy import text

plt.style.use("ggplot")

# Task 2.15: Testing, documentation and reporting

## Part K: Gaula

In [None]:
# Connect to JupyterHub's PostGIS database
eng = nivapy.da.connect_postgis()

In [None]:
# Read data
xl_path = r"../../data/gaula_data.xlsx"
stn_df = pd.read_excel(xl_path, sheet_name="station")
wc_df = pd.read_excel(xl_path, sheet_name="chem", decimal=",")
wc_df["date"] = pd.to_datetime(wc_df["date"])

print("Water chemsitry data:")
display(wc_df.head())

print("Station details:")
display(stn_df)

In [None]:
# Restructure
wc_df = wc_df.groupby(["site_id", "parameter", "date"]).mean()
wc_df = wc_df.unstack("parameter")
wc_df.columns = [f"{col}" for col in wc_df.columns.get_level_values(1)]
wc_df.reset_index(inplace=True)

wc_df.rename(columns={"TOTN": "TOTN_ug/l", "TOTP": "TOTP_ug/l", "TOC":"TOC_mg/l"}, inplace=True)

wc_df.dropna(how='any', inplace=True)
wc_df.head()

In [None]:
wc_df = wc_df.query("date >= '2013-01-01'")

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12, 4))
pars = ['TOTN_ug/l', 'TOTP_ug/l', "TOC_mg/l"]
for idx, par in enumerate(pars):
    par_df = wc_df[['date', par]].copy()
    par_df.sort_values("date", inplace=True)
    axes[idx].plot(par_df["date"], par_df[par], "ko-")
    axes[idx].set_title(par)
plt.tight_layout()

In [None]:
nve_stn_df = nivapy.da.get_nve_hydapi_stations()
nve_id = stn_df['nve_station_id'].iloc[0]
nve_stn_df = nve_stn_df.query("station_id == @nve_id")
nve_stn_df

In [None]:
# Derive catchment boundaries for chem and Q stations
stn_gdf = nivapy.spatial.derive_watershed_boundaries(
    stn_df,
    id_col="site_id",
    xcol="lon",
    ycol="lat",
    crs="epsg:4326",
    min_size_km2=5,
    dem_res_m=40,
    buffer_km=None,
    temp_fold=None,
    reproject=False,
)
stn_gdf["chem_area_km2"] = stn_gdf.to_crs({"proj": "cea"}).geometry.area / 1e6

nve_gdf = nivapy.spatial.derive_watershed_boundaries(
    nve_stn_df,
    id_col="station_id",
    xcol="longitude",
    ycol="latitude",
    crs="epsg:4326",
    min_size_km2=5,
    dem_res_m=40,
    buffer_km=None,
    temp_fold=None,
    reproject=False,
)
nve_gdf["q_area_km2"] = nve_gdf.to_crs({"proj": "cea"}).geometry.area / 1e6
nve_gdf["nve_station_id"] = nve_gdf["station_id"]

# Join areas back to 'stn_df' and compare to NVE values for the Q stations (from HydAPI)
stn_df = pd.merge(
    stn_df,
    stn_gdf[["site_id", "chem_area_km2"]],
    how="left",
    on="site_id",
)
stn_df = pd.merge(
    stn_df,
    nve_gdf[["nve_station_id", "q_area_km2"]],
    how="left",
    on="nve_station_id",
)

stn_df

In [None]:
# Estimate annual fluxes
df_list = []
par_list = ['TOTN_ug/l', 'TOTP_ug/l', "TOC_mg/l"]
for idx, row in stn_df.iterrows():
    vm_id = row["site_id"]
    nve_id = row["nve_station_id"]
    area_fac = row["chem_area_km2"] / row["q_area_km2"]

    # Get chem data for station
    chem_stn_df = wc_df.query("site_id == @vm_id").copy()
    chem_stn_df.set_index("date", inplace=True)
    chem_stn_df = chem_stn_df[par_list].resample("D").mean().dropna()

    # Get flow data for stations
    q_stn_df = nivapy.da.query_nve_hydapi(
        [nve_id], [1001], f"2013-01-01", f"2022-12-31", resolution=1440
    )
    q_stn_df = area_fac * q_stn_df.set_index("datetime")[["value"]].resample("D").mean()
    q_stn_df.index = q_stn_df.index.tz_localize(None)
    q_stn_df.rename(columns={"value": "flow_m3/s"}, inplace=True)

    # Calculate annual fluxes
    flux_df = nivapy.stats.estimate_fluxes(
        q_stn_df,
        chem_stn_df,
        base_freq="D",
        agg_freq="A",
        method="ospar_annual",
    )

    # Convert kg to tonnes
    flux_df = flux_df / 1000
    flux_df.columns = [col.replace("kg", "tonnes") for col in flux_df.columns]
    flux_df["site_id"] = vm_id
    df_list.append(flux_df)
flux_df = pd.concat(df_list, axis="rows")
flux_df = flux_df.reset_index()

flux_df

In [None]:
nve_data_year = 2023
st_yr, end_yr = 2013, 2022
out_csv_fold = r"/home/jovyan/shared/common/teotil3/annual_input_data"
eval_fold = r"/home/jovyan/shared/common/teotil3/evaluation"

In [None]:
def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix) :]
    return text
    
# Read saved data for speed
mod_csv = os.path.join(
    eval_fold, f"teo3_results_nve{nve_data_year}_{st_yr}-{end_yr}.csv"
)
mod_df = pd.read_csv(mod_csv)
   
# Tidy modelled data for comparison
cols = [col for col in mod_df.columns if col.startswith("accum_")]
mod_df = mod_df[["regine", "year"] + cols].copy()
cols = [remove_prefix(col, "accum_") for col in cols]
mod_df.columns = ["regine", "year"] + cols
for col in cols:
    if col.endswith("_kg"):
        mod_df[col[:-3] + "_tonnes"] = mod_df[col] / 1000
    del mod_df[col]

reg_id = stn_df['regine'].iloc[0]
mod_df = mod_df.query("regine in @reg_id")
mod_df.head()

In [None]:
# Merge
comp_df = pd.merge(flux_df, mod_df, how="left", on="year")
comp_df.head()

In [None]:
# Plot: single parameter per river
pars = ["TOTN", "TOTP", "TOC"]

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12,3))

for idx, par in enumerate(pars):
    par_cols = [
        col
        for col in comp_df.columns
        if par.lower() in (i.lower() for i in col.split("_"))
    ]

    comp_par_df = comp_df.set_index("year")[par_cols]

    comp_par_df.drop(columns=[f"{par}_tonnes"]).plot(
        kind="bar",
        stacked=True,
        ax=axes[idx],
        legend=False,
        cmap="tab10",
    )

    axes[idx].plot(
        comp_par_df.index - comp_par_df.index.min(),
        comp_par_df[f"{par}_tonnes"],
        marker="o",
        color="red",
        label="Observed",
    )
    axes[idx].set_title(par)
    axes[idx].set_ylabel(f"{par} (tonnes)")
plt.tight_layout()

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="lower center", bbox_to_anchor=(0.5, -0.4), ncol=2)