In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import pickle as pkl
import numpy as np
import pandas as pd
import xarray as xr
from dfm_models.utils.analysis import get_modulus_angle, harmonic_analysis
from dfm_models.utils.io import download_COOPs, download_nwis
from dfm_models.utils.visualization import one2one, spatial_stat
from VVUQ import metrics

In [None]:
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
import geoviews.tile_sources as gts

from geoviews import opts

from bokeh.models import HoverTool

gv.extension("bokeh", "matplotlib")

In [None]:
# local functions
# get rid of nans
def check_nans(obs_wl):
    if obs_wl.isna().sum() / len(obs_wl) > 0.05:
        print(f"{station_name} has more than 5% nans")
        return False
    else:
        obs_wl.fillna(method="ffill", inplace=True)
        return True

In [None]:
## PARAMETERS ##
# project files
d3d = "/mnt/c/Users/rdchlclj/Projects/MR_D3D_model/Delft3D/notebooks"

# NOAA COOPs
tidal_stations_fn = (
    "/mnt/g/MR_D3D_model/ArcPro/MyProject/output data/tidal_constituents_stations.xlsx"
)

# model data
model_data = "/mnt/g/MR_D3D_model"
case = "p01"
case_name = "p01_10sig_2019prod"
study = "prod_2019"
model_output_fn = f"{case}_merged_his.nc"

# analysis config
# comparison dates
begin_date = "20190101"
end_date = "20191231"

# tidal stations to drop
coops_drop_stations = (
    "Pensacola",
    "East Fowl River Bridge",
    "Mobile State Docks",
    "Chickasaw Creek",
    "West Fowl River Bridge",
    "Bayou La Batre Bridge",
    "Grand Bay NERR, Mississippi Sound",
    "Sabine Pass North",
    "Texas Point, Sabine Pass",
    "Weeks Bay, Mobile Bay",
)

# determine whether to load observations
loadObs = False

In [None]:
## SET UP PATHS ##
model_data = Path(model_data)

d3d = Path(d3d)
project = d3d / "water_levels"
output = project / "output"
obsOutput = output / "observations"
input = project / "input"
data = project / "data"

# output
case_output = output / case_name
if not case_output.exists():
    case_output.mkdir()

analysis_output = case_output / "water_level_error"
if not analysis_output.exists():
    analysis_output.mkdir()

# NOAA COOPs
tidal_stations_fn = Path(tidal_stations_fn)
tidal_stations_all = pd.read_excel(tidal_stations_fn, index_col=[0])
tidal_stations = tidal_stations_all[~tidal_stations_all.name.isin(coops_drop_stations)]

# USGS stations
USGS_stations = pd.read_csv(input / "USGS_stations.csv")
USGS_stations["station_code"] = USGS_stations["station_code"].str.strip("b'")

# model data
model_output = model_data / f"Delft3d/models/{study}/{case_name}"
his_fn = model_output / model_output_fn
his_data = (
    xr.open_dataset(his_fn)
    .swap_dims({"stations": "station_name"})
    .drop_vars(["station_id"])
)

In [None]:
# separate non-NOAA tide station output
all_stations = his_data.coords["station_name"].values
NOAA_stations = ["4" not in str(t) for t in all_stations]  # NDBC station codes
NOAA_stations = all_stations[NOAA_stations]
noaa_his_data = his_data.sel(station_name=NOAA_stations)

In [None]:
## Observations
if not loadObs:

    # NOAA observed time series data
    datum = "MSL"
    form = "csv"

    noaa_wls = {}
    noaa_pred = {}

    for _, (station_id, station_name) in tidal_stations[["id", "name"]].iterrows():

        # meta data
        station_code = station_name.replace(" ", "_").replace(",", "")
        NOAAData = download_COOPs(
            "hourly_height", station_name, station_id, datum, begin_date, end_date
        )

        if check_nans(NOAAData):
            noaa_wls[station_code] = NOAAData
        
        NOAAPred = download_COOPs(
            "predictions", station_name, station_id, datum, begin_date, end_date
        )

        if check_nans(NOAAPred):
            noaa_pred[station_code] = NOAAPred

    # USGS stations
    USGSWLs = {}

    for _, (station_id, name) in USGS_stations[["station_code", "nickname"]].iterrows():
        USGSData = download_nwis(name, station_id, begin_date, end_date, data_code=65)

        # no data
        if type(USGSData) != pd.core.series.Series:
            continue

        # nans
        if check_nans(USGSData):
            USGSWLs[name] = USGSData
            USGSWLs[name] -= USGSWLs[name].mean()
            USGSWLs[name] *= 0.3048

else:

    try:
        fn = obsOutput / f"NOAAWLs-{begin_date}-{end_date}.pkl.gz"
        with open(fn, "rb") as f:
            noaa_wls = pkl.load(f)
        
        fn = obsOutput / f"NOAAPreds-{begin_date}-{end_date}.pkl.gz"
        with open(fn, "rb") as f:
            noaa_pred = pkl.load(f)

        fn = obsOutput / f"USGSWLs-{begin_date}-{end_date}.pkl.gz"
        with open(fn, "rb") as f:
            USGSWLs = pkl.load(f)
    
    except FileNotFoundError as e:
        print(f"Obsevations not available at:\n\t{fn}")
        raise e

# Error analysis

In [None]:
number = []
nrmse_tide_col = []
nrmse_col = []
rmse_col = []
r2_col = []
xs = []
ys = []
stations = []

## NOAA
for station_name in noaa_wls.keys():

    model_wl = (
        his_data.sel(station_name=station_name.encode())["waterlevel"]
        .to_series()
        .resample("1H")
        .mean()
    )
    obs_wl = noaa_wls[station_name]
    pred_wl = noaa_pred[station_name]
    
    # regularize
    tCommon = obs_wl.index.intersection(model_wl.index)
    model_wl = model_wl.loc[tCommon]
    obs_wl = obs_wl.loc[tCommon]

    # demean
    model_wl -= model_wl.mean()
    obs_wl -= obs_wl.mean()

    # D3D results
    xs.append(his_data.sel(station_name=station_name.encode())["station_x_coordinate"])
    ys.append(his_data.sel(station_name=station_name.encode())["station_y_coordinate"])
    stations.append(station_name)

    # stats
    error = (model_wl - obs_wl).dropna()
    rmse = np.sqrt(np.mean(error ** 2))
    nrmse_tide = rmse / (pred_wl.max() - pred_wl.min())
    nrmse = rmse / (obs_wl.max() - obs_wl.min())
    r2 = metrics.r2(model_wl, obs_wl)

    # store
    rmse_col.append(rmse)
    nrmse_col.append(nrmse)
    nrmse_tide_col.append(nrmse_tide)
    r2_col.append(r2)
    number.append(len(error))

    # figures
    fig, ax = plt.subplots(figsize=(7, 7))
    ax = one2one(obs_wl, model_wl, quantity_str="water level [m, MSL]", ax=ax)
    ax.set_title(station_name.replace("_", " "))
    fn = analysis_output / f"{station_name}_noaa_verified_water_level_one2one.png"
    fig.savefig(fn, bbox_inches="tight")
    plt.close(fig)

In [None]:
## USGS
for _, (station_id, station_name) in USGS_stations[["station_code", "nickname"]].iterrows():
    
    # no data
    if type(USGSWLs[station_name]) != pd.core.series.Series:
        continue
        
    # data
    model_wl = (
        his_data.sel(station_name=station_id.encode())["waterlevel"]
        .to_series()
        .resample("1H")
        .mean()
    )
    obs_wl = USGSWLs[station_name].resample("1H").mean().fillna(method="ffill")

    # regularize
    tCommon = obs_wl.index.intersection(model_wl.index)
    model_wl = model_wl.loc[tCommon]
    obs_wl = obs_wl.loc[tCommon]

    # demean
    model_wl -= model_wl.mean()
    obs_wl -= obs_wl.mean()

    # D3D results
    xs.append(his_data.sel(station_name=station_id.encode())["station_x_coordinate"])
    ys.append(his_data.sel(station_name=station_id.encode())["station_y_coordinate"])
    stations.append(station_name)

    # stats
    error = (model_wl - obs_wl).dropna()
    rmse = np.sqrt(np.mean(error ** 2))
    nrmse_tide = np.nan
    nrmse = rmse / (obs_wl.max() - obs_wl.min())
    r2 = metrics.r2(model_wl, obs_wl)

    # store
    rmse_col.append(rmse)
    nrmse_col.append(nrmse)
    nrmse_tide_col.append(nrmse_tide)
    r2_col.append(r2)
    number.append(len(error))

    # figures
    fig, ax = plt.subplots(figsize=(7, 7))
    ax = one2one(obs_wl, model_wl, quantity_str="water level [m, MSL]", ax=ax)
    ax.set_title(station_name.replace("_", " "))
    fn = analysis_output / f"{station_name}_USGS_verified_water_level_one2one.png"
    fig.savefig(fn, bbox_inches="tight")
    plt.close(fig)

In [None]:
stats = pd.DataFrame(
    np.stack(
        [stations, number, nrmse_col, nrmse_tide_col, rmse_col, r2_col, xs, ys,],
        axis=1,
    ),
    columns=["station", "number", "nrmse", "nrmse_tide", "rmse_cm", "r2", "lon", "lat"],
).astype(
    {
        "station": str,
        "number": int,
        "nrmse": float,
        "nrmse_tide": float,
        "rmse_cm": float,
        "r2": float,
        "lon": float,
        "lat": float,
    }
)
stats["nrmse"] *= 100
stats["nrmse_tide"] *= 100
stats["rmse_cm"] *= 100
stats_fn = analysis_output / f"{case}_water_level_error_stats.csv"
stats.to_csv(stats_fn, index=False)

In [None]:
# data
lon = hv.Dimension("lon", label="Longitude [deg]")
lat = hv.Dimension("lat", label="Latitude [deg]")
hv_stats = hv.Table(stats, kdims=[lon, lat])
cols = stats.columns[2:-2].values
clabels = {
    "nrmse": "normalized RMSE [% range]",
    "nrmse_tide": "normalized RMSE [% tidal range]",
    "rmse_cm": "RMSE [cm]",
    "r2": "r-squared [.]",
}

# hover tool
tooltips = [
    ("Station", "@station"),
    ("# obs.", "@number"),
    ("Normalized RMSE [% range]", "@nrmse"),
    ("Normalized RMSE [% tidal range]", "@nrmse_tide"),
    ("RMSE [cm]", "@rmse_cm"),
    ("r-squared", "@r2"),
]
hover = HoverTool(tooltips=tooltips)

# style
psize = 10
cst_lw = 1.25

# Holoviews options
cOpts = opts.LineContours(line_width=cst_lw)
overOpts = opts.Overlay(aspect=6.5 / 3, responsive=True)

# generate holomap
holomap = hv.HoloMap(kdims="Statistic")
for col in cols:

    clabel = clabels[col]  # colorbar text label

    # options for points
    pOpts = opts.Points(
        size=psize,
        color=col,
        cmap="turbo",
        colorbar=True,
        clabel=clabel,
        tools=[hover],
        clim=(0, hv_stats[col].max()),
    )

    # put together
    overlay = (
        gf.coastline(scale="10m").opts(cOpts)
        * gv.Points(hv_stats).opts(pOpts)
        * gts.EsriImagery
    )

    # map
    holomap[col] = overlay.opts(overOpts)

# save output
gv.save(holomap, analysis_output / f"{case}_spatial_statistics.html")

In [None]:
## Store observations
if not loadObs:
    fn = obsOutput / f"NOAAWLs-{begin_date}-{end_date}.pkl.gz"
    with open(fn, "wb") as f:
        pkl.dump(noaa_wls, f)
        
    fn = obsOutput / f"NOAAPreds-{begin_date}-{end_date}.pkl.gz"
    with open(fn, "wb") as f:
        pkl.dump(noaa_pred, f)
    
    fn = obsOutput / f"USGSWLs-{begin_date}-{end_date}.pkl.gz"
    with open(fn, "wb") as f:
        pkl.dump(USGSWLs, f)