In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import pytide
import seaborn as sns
import xarray as xr
from dfm_models.utils.analysis import get_modulus_angle, harmonic_analysis
from dfm_models.utils.io import download_COOPs, download_nwis
from dfm_models.utils.visualization import one2one, spatial_stat
from scipy import signal
from VVUQ import metrics

In [None]:
from bokeh.io import curdoc, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.themes import built_in_themes

output_notebook()
curdoc().theme = "light_minimal"

In [None]:
## PARAMETERS ##
# project files
d3d = "/mnt/c/Users/rdchlclj/Projects/MR_D3D_model/Delft3D/notebooks"

# NOAA COOPs
tidal_stations_fn = (
    "/mnt/g/MR_D3D_model/ArcPro/MyProject/output data/tidal_constituents_stations.xlsx"
)

# model data
model_data = "/mnt/g/MR_D3D_model"
case = "p01"
case_name = "p01_10sig_2019prod"
study = "prod_2019"

# analysis config
# comparison dates
begin_date = "20190101"
end_date = "20191231"

# tidal stations to drop
coops_drop_stations = (
    "Pensacola",
    "East Fowl River Bridge",
    "Mobile State Docks",
    "Chickasaw Creek",
    "West Fowl River Bridge",
    "Bayou La Batre Bridge",
    "Grand Bay NERR, Mississippi Sound",
    "Pascagoula NOAA Lab",
    "Sabine Pass North",
    "Texas Point, Sabine Pass",
    "Weeks Bay, Mobile Bay",
)

In [None]:
## SET UP PATHS ##
model_data = Path(model_data)

d3d = Path(d3d)
project = d3d / "water_levels"
output = project / "output"
input = project / "input"
data = project / "data"

# output
case_output = output / case_name
if not case_output.exists():
    case_output.mkdir()

analysis_output = case_output / "water_level_error"
if not analysis_output.exists():
    analysis_output.mkdir()

# NOAA COOPs
tidal_stations_fn = Path(tidal_stations_fn)
tidal_stations_all = pd.read_excel(tidal_stations_fn, index_col=[0])
tidal_stations = tidal_stations_all[~tidal_stations_all.name.isin(coops_drop_stations)]

# USGS stations
USGS_stations = pd.read_csv(input / "USGS_stations.csv")
USGS_stations["station_code"] = USGS_stations["station_code"].str.strip("b'")

# model data
model_output = model_data / f"Delft3d/models/{study}/{case_name}"
his_fn = model_output / f"{case}_merged_his.nc"
his_data = (
    xr.open_dataset(his_fn)
    .swap_dims({"stations": "station_name"})
    .drop_vars(["station_id"])
)

In [None]:
# separate non-NOAA tide station output
all_stations = his_data.coords["station_name"].values
NOAA_stations = ["4" not in str(t) for t in all_stations]  # NDBC station codes
NOAA_stations = all_stations[NOAA_stations]
noaa_his_data = his_data.sel(station_name=NOAA_stations)

In [None]:
# NOAA observed time series data
datum = "MSL"

noaa_wls = {}
noaa_predicted_wls = {}

for _, (station_id, name) in tidal_stations[["id", "name"]].iterrows():

    # meta data
    station_code = name.replace(" ", "_").replace(",", "")
    noaa_wls[station_code] = download_COOPs("hourly_height", name, station_id, datum, begin_date, end_date)
    noaa_predicted_wls[station_code] = download_COOPs("predictions",  name, station_id, datum, begin_date, end_date)

In [None]:
# USGS stations
USGSWLs = {}

for _, (station_id, name) in USGS_stations[["station_code", "nickname"]].iterrows():
    USGSWLs[name] = download_nwis(name, station_id, begin_date, end_date, data_code=65)
    USGSWLs[name] -= USGSWLs[name].mean()
    USGSWLs[name] *= 0.3048

# Error Analysis

In [None]:
nrmse_tide_col = []
nrmse_col = []
rmse_col = []
r2_col = []
xs = []
ys = []
stations = []

## NOAA
for station_name in noaa_wls.keys():

    model_wl = (
        his_data.sel(station_name=station_name.encode())["waterlevel"]
        .to_series()
        .resample("1H")
        .mean()
    )
    obs_wl = noaa_wls[station_name]
    pred_wl = noaa_predicted_wls[station_name]
    
    # regularize
    tCommon = obs_wl.index.intersection(model_wl.index)
    model_wl = model_wl.loc[tCommon]
    obs_wl = obs_wl.loc[tCommon]

    # demean
    model_wl -= model_wl.mean()
    obs_wl -= obs_wl.mean()

    # D3D results
    xs.append(his_data.sel(station_name=station_name.encode())["station_x_coordinate"])
    ys.append(his_data.sel(station_name=station_name.encode())["station_y_coordinate"])
    stations.append(station_name)

    # stats
    error = (model_wl - obs_wl).dropna()
    rmse = np.sqrt(np.mean(error ** 2))
    nrmse_tide = rmse / (pred_wl.max() - pred_wl.min())
    nrmse = rmse / (obs_wl.max() - obs_wl.min())
    r2 = metrics.r2(model_wl, obs_wl)

    # store
    rmse_col.append(rmse)
    nrmse_col.append(nrmse)
    nrmse_tide_col.append(nrmse_tide)
    r2_col.append(r2)

    # figures
    fig, ax = plt.subplots(figsize=(7, 7))
    ax = one2one(obs_wl, model_wl, quantity_str="water level [m, MSL]", ax=ax)
    ax.set_title(station_name.replace("_", " "))
    fn = analysis_output / f"{station_name}_noaa_verified_water_level_one2one.png"
    fig.savefig(fn, bbox_inches="tight")
    plt.close(fig)

In [None]:
## USGS
for _, (station_id, station_name) in USGS_stations[["station_code", "nickname"]].iterrows():

    # data
    model_wl = (
        his_data.sel(station_name=station_id.encode())["waterlevel"]
        .to_series()
        .resample("1H")
        .mean()
    )
    obs_wl = USGSWLs[station_name].resample("1H").mean().fillna(method="ffill")

    # regularize
    tCommon = obs_wl.index.intersection(model_wl.index)
    model_wl = model_wl.loc[tCommon]
    obs_wl = obs_wl.loc[tCommon]

    # demean
    model_wl -= model_wl.mean()
    obs_wl -= obs_wl.mean()

    # D3D results
    xs.append(his_data.sel(station_name=station_id.encode())["station_x_coordinate"])
    ys.append(his_data.sel(station_name=station_id.encode())["station_y_coordinate"])
    stations.append(station_name)

    # stats
    error = (model_wl - obs_wl).dropna()
    rmse = np.sqrt(np.mean(error ** 2))
    nrmse_tide = np.nan
    nrmse = rmse / (obs_wl.max() - obs_wl.min())
    r2 = metrics.r2(model_wl, obs_wl)

    # store
    rmse_col.append(rmse)
    nrmse_col.append(nrmse)
    nrmse_tide_col.append(nrmse_tide)
    r2_col.append(r2)

    # figures
    fig, ax = plt.subplots(figsize=(7, 7))
    ax = one2one(obs_wl, model_wl, quantity_str="water level [m, MSL]", ax=ax)
    ax.set_title(station_name.replace("_", " "))
    fn = analysis_output / f"{station_name}_USGS_verified_water_level_one2one.png"
    fig.savefig(fn, bbox_inches="tight")
    plt.close(fig)

In [None]:
stats = pd.DataFrame(
    np.stack([stations, nrmse_col, nrmse_tide_col, rmse_col, r2_col, xs, ys], axis=1),
    columns=["station", "nrmse", "nrmse_tide", "rmse_m", "r2", "lon", "lat"],
).astype(
    {
        "station": str,
        "nrmse": float,
        "nrmse_tide": float,
        "rmse_m": float,
        "r2": float,
        "lon": float,
        "lat": float,
    }
)
stats_fn = analysis_output / f"water_level_error_stats.csv"
stats.to_csv(stats_fn, index=False)

In [None]:
fig = plt.figure(figsize=(26, 10))
labels = [label.split("_")[0] for label in stats["station"]]
fig, ax, cbar = spatial_stat(stats["lon"], stats["lat"], 100 * stats["nrmse"], labels, quantity_str="normalized rmse [%]", tbuff=-0.05, fig=fig)
fig.savefig(analysis_output / f"nrmse_water_levels.png", dpi=300)
plt.close(fig)

In [None]:
fig = plt.figure(figsize=(26, 10))
labels = [label.split("_")[0] for label in stats["station"]]
fig, ax, cbar = spatial_stat(stats["lon"], stats["lat"], 100 * stats["rmse_m"], labels, quantity_str="rmse [cm]", tbuff=-0.05, fig=fig, vmax=20)
fig.savefig(analysis_output / f"nrmse_water_levels.png", dpi=300)
plt.close(fig)

In [None]:
fig = plt.figure(figsize=(16, 6.5))
labels = [label.split("_")[0] for label in stats["station"]]
tmp_stats = stats.loc[:14]
fig, ax, cbar = spatial_stat(tmp_stats["lon"], tmp_stats["lat"], 100 * tmp_stats["nrmse_tide"], labels, tbuff=-0.05, fig=fig)
fig.savefig(analysis_output / f"nrmse_tidal_water_levels.png", dpi=300)
plt.close(fig)