In [None]:
import numpy as np
import netCDF4 as nc
import pandas as pd
import xarray as xr
import matplotlib as mpl
import matplotlib.pyplot as plt
import urllib.request
import json
import os
import cartopy.crs as ccrs
import seaborn as sns
from VVUQ import metrics

from pathlib import Path

In [None]:
plt.style.use("seaborn-talk")

In [None]:
# plotting function
def one2one(obs, mod, quantity_str="water level [m, NAVD88]", lims=None, ax=None):
    std = obs.std()
    std_shift = np.sin(np.pi / 4) * std
    
    if lims == None:
        lims = [np.min([obs.min(), mod.min()]), np.max([obs.max(), mod.max()])]

    lower_bound_x = [lims[0] + std_shift, lims[1]]
    lower_bound_y = [lims[0], lims[1] - std_shift]
    upper_bound_x = [lims[0], lims[1] - std_shift]
    upper_bound_y = [lims[0] + std_shift, lims[1]]

    fill_between_x = lims
    fill_between_y1 = [lims[0] - std_shift, lims[1] - std_shift]
    fill_between_y2 = [lims[0] + std_shift, lims[1] + std_shift]

    if ax == None:
        fig, ax = plt.subplots(figsize=(5, 5))
        ax.scatter(obs, mod, 15)
        ax.plot(lims, lims, color="k", linestyle="--")

        ax.plot(lower_bound_x, lower_bound_y, color="k", lw=0.75)
        ax.plot(upper_bound_x, upper_bound_y, color="k", lw=0.75)
        ax.fill_between(
            fill_between_x, fill_between_y1, fill_between_y2, alpha=0.2, color="gray"
        )
        ax.set_xlim(lims)
        ax.set_ylim(lims)
        ax.set_xlabel(f"observed {quantity_str}")
        ax.set_ylabel(f"modeled {quantity_str}")

        return fig, ax
    else:
        ax.scatter(obs, mod, 15)
        ax.plot(lims, lims, color="k", linestyle="--")

        ax.plot(lower_bound_x, lower_bound_y, color="k", lw=0.75)
        ax.plot(upper_bound_x, upper_bound_y, color="k", lw=0.75)
        ax.fill_between(
            fill_between_x, fill_between_y1, fill_between_y2, alpha=0.2, color="gray"
        )
        ax.set_xlim(lims)
        ax.set_ylim(lims)
        ax.set_xlabel(f"observed {quantity_str}")
        ax.set_ylabel(f"modeled {quantity_str}")
        return ax

In [None]:
# project files
d3d = Path("/mnt/c/Users/rdchlclj/Projects/MR_D3D_model/Delft3D")
data = Path("/mnt/e/MS_River_plume")
project = d3d / "tidal_constituent_boundary_conditions"
output = project / "output"
figures = project / "figures"

# NOAA COOPs
const_data = project / "constituent_data"
noaa_prediction_data = project / "noaa_COOPs_prediction_data"
tidal_stations_fn = Path(
    "/mnt/e/MS_River_plume/ArcPro/MyProject/output data/tidal_constituents_stations.xlsx"
)
# tidal_stations_fn = Path(
#    "E:\\ArcPro\\MyProject\\output data\\tidal_constituents_stations.xlsx"
# )
tidal_stations_all = pd.read_excel(tidal_stations_fn, index_col=[0])

# msl to navd88
# datum_conversion_fn = output / "tidal_stations_msl_to_navd88_vdatum.txt"
# datum_conversion = pd.read_csv(datum_conversion_fn, na_values=[-999999], index_col=[0])

# model boundary condition constituents
FES_comps = pd.read_csv(
    "example_FES2014_comps.txt",
    sep="\s+",
    names=["name", "amp", "phase"],
    index_col=[0],
)

In [None]:
# model data
case = "p03"
case_name = f"{case}_10sig_2018prod"
study = "prod_2018"
model_output = data / f"Delft3d/models/{study}/{case_name}"
# his_fn = output / f"{case_name}_combined_his.nc"
his_fn = model_output / f"{case}_merged_his.nc"
his_data = (
    xr.open_dataset(his_fn)
    .swap_dims({"stations": "station_name"})
    .drop_vars(["station_id"])
)

In [None]:
# separate non-NOAA tide station output
all_stations = his_data.coords["station_name"].values
NOAA_stations = ["4" not in str(t) for t in all_stations]  # NDBC station codes
NOAA_stations = all_stations[NOAA_stations]
noaa_his_data = his_data.sel(station_name=NOAA_stations)

In [None]:
# clean up data
# tidal stations to drop
drop_stations = (
    'Pensacola',
    'East Fowl River Bridge',
    'Mobile State Docks',
    'Chickasaw Creek',
    'West Fowl River Bridge',
    'Bayou La Batre Bridge',
    'Grand Bay NERR, Mississippi Sound',
    'Pascagoula NOAA Lab',
    'Sabine Pass North',
    'Texas Point, Sabine Pass'
)
tidal_stations = tidal_stations_all[~tidal_stations_all.name.isin(drop_stations)]

# download datums and constants from NOAA

In [None]:
def get_amp_phase(harcon):
    const = harcon['HarmonicConstituents']
    names = []
    amps = []
    phases_GMT = []
    speeds = []
    for component in const:
        names.append(component['name'])
        amps.append(component['amplitude'])
        phases_GMT.append(component['phase_GMT'])
        speeds.append(component['speed'])
        pass
    return names, amps, phases_GMT, speeds

def get_datums(datums):
    """interate over NOAA datum list and compute shifts"""
    for datum in datums:
        if datum['name'] == 'STND':
            STND = datum['value']
            pass
        elif datum['name'] == 'MTL':
            MTL = datum['value']
            pass
        elif datum['name'] == 'MSL':
            MSL = datum['value']
            pass
        elif datum['name'] == 'NAVD88':
            NAVD88 = datum['value']
            pass
        pass
    
    try:
        NAVD88
        pass
    except:
        NAVD88 = np.nan
    
    return STND, MTL, MSL, NAVD88

In [None]:
# get datums
station_ids = []
station_names = []
station_codes = []
STND = []
MTL = []
MSL = []
NAVD88 = []
datums = []

for _, (station_id, name) in tidal_stations[['id', 'name']].iterrows():
    
    # meta data
    station_code = name.replace(' ', '_').replace(',', '')
    station_name = name.replace(' ', '_') + '_datums.json'
    station_ids.append(station_id)
    resource = 'datums'
    request = f'http://tidesandcurrents.noaa.gov/mdapi/v1.0/webapi/stations/{station_id}/{resource}.json?units=metric'
    station_codes.append(station_code)
    station_names.append(station_name.strip('.json'))
    out_fn = noaa_prediction_data / f'{station_name}'
    
    # conditionally download
    if not out_fn.exists():
        txt, http = urllib.request.urlretrieve(request, out_fn)
        pass
    else:
        txt = out_fn
        pass
    
    # parse json
    with open(txt, 'r') as f:
        response = json.load(f)
        pass
    
    datums  = get_datums(response['datums'])
    
    STND.append(datums[0])
    MTL.append(datums[1])
    MSL.append(datums[2])
    NAVD88.append(datums[3])

# create DataFrames
datums = pd.DataFrame({'station_id': station_ids, 'MTL': MTL, 'MSL': MSL, 'NAVD88': NAVD88}, index=station_codes)
datums['MSL2NAVD88'] = datums['MSL'] - datums['NAVD88']
datums['MTL_NAVD88'] = datums['MTL'] - datums['NAVD88']
compare_datum_conv = datums.join(datum_conversion, on='station_id', rsuffix='_vdatum').drop(['latitude', 'longitude'], axis=1)

In [None]:
# test data
amplitudes = []
phase_angles = []
speeds = []
station_ids = []
station_names = []
station_codes = []

for _, (station_id, name) in tidal_stations[['id', 'name']].iterrows():
    
    # meta data
    station_code = name.replace(' ', '_').replace(',', '')
    station_name = name.replace(' ', '_') + '.json'
    station_ids.append(station_id)
    resource = 'harcon'
    request = f'https://tidesandcurrents.noaa.gov/mdapi/v1.0/webapi/stations/{station_id}/{resource}.json?units=metric'
    station_codes.append(station_code)
    station_names.append(station_name.strip('.json'))
    out_fn = const_data / f'{station_name}'
    
    # conditionally download
    if not out_fn.exists():
        txt, http = urllib.request.urlretrieve(request, out_fn)
        pass
    else:
        txt = out_fn
        pass
    
    # parse json
    with open(txt, 'r') as f:
        harcon = json.load(f)
        names, amps, phases, speed = get_amp_phase(harcon)
        tmp_ser = pd.Series(amps, index=names)
        amplitudes.append(tmp_ser)
        tmp_ser2 = pd.Series(phases, index=names)
        phase_angles.append(tmp_ser2)
        tmp_ser3 = pd.Series(speed, index=names)
        speeds.append(tmp_ser3)
        pass

# create DataFrames
amplitudes = pd.DataFrame(amplitudes, columns=names, index=station_codes)
phase_angles = pd.DataFrame(phase_angles, columns=names, index=station_codes)
speeds = pd.DataFrame(speeds, columns=names, index=station_codes)
station_lookup = pd.Series(station_ids, index=station_codes)

# Verify results

In [None]:
def harmonic_analysis_model(waterlevel, consts):
    time = waterlevel["time"].values.astype("datetime64[s]")

    wt = pytide.WaveTable(consts)
    h = waterlevel.values
    f, vu = wt.compute_nodal_modulations(time)
    w = wt.harmonic_analysis(h, f, vu)
    hp = wt.tide_from_tide_series(time, w)
    return w, (h, hp, time)


def get_modulus_angle(w):
    modulus = np.abs(w)
    angle = np.angle(w, deg=True)
    return modulus, angle


def get_results(w, amplitude, phase_angle):

    cols = [
        "model_amp",
        "noaa_amp",
        "amp_error",
        "model_phase",
        "noaa_phase",
        "phase_error",
    ]
    modulus, angle = get_modulus_angle(w)
    angle[angle < 0] = angle[angle < 0] + 360

    results = pd.DataFrame(
        np.stack([modulus, angle], axis=1),
        index=amplitude.index,
        columns=["model_amp", "model_phase"],
    )
    results["amp_error"] = results["model_amp"] - amplitude
    results["phase_error"] = results["model_phase"] - phase_angle
    results["noaa_amp"] = amplitude
    results["noaa_phase"] = phase_angle

    return results[cols]

In [None]:
# get top 10 consts in BCs
additional_consts = ["SA", "SSA"]
top = 13
important_consts = (
    FES_comps.sort_values(["amp"], ascending=False)
    .iloc[:top]
    .append(FES_comps.loc[additional_consts])
)
important_consts_order = important_consts.index.values
important_consts_caps = important_consts.index.str.capitalize().values

In [None]:
# NOAA time series data
begin_date = "20180101"
end_date = "20181223"
product = "predictions"
interval = "h"
datum = "NAVD"
form = "csv"
time_zone = "gmt"
units = "metric"

noaa_predicted_time_series = {}

for _, (station_id, name) in tidal_stations[["id", "name"]].iterrows():

    # meta data
    station_code = name.replace(" ", "_").replace(",", "")
    request = f"https://tidesandcurrents.noaa.gov/api/datagetter?begin_date={begin_date}&end_date={end_date}&station={station_id}&product={product}&datum={datum}&units={units}&time_zone={time_zone}&application=ERDC&format={form}&interval={interval}"
    out_fn = noaa_prediction_data / f"{station_code}_{product}_ts_{datum}.csv"

    # conditionally download
    if not out_fn.exists():
        csv, http = urllib.request.urlretrieve(request, out_fn)
        pass
    else:
        csv = out_fn
        pass

    noaa_data = pd.read_csv(
        csv, index_col=[0], parse_dates=True, names=["time", "prediction"], header=0
    )
    noaa_predicted_time_series[station_code] = noaa_data

In [None]:
# NOAA time series
product = "predictions"
interval = "h"
datum = "MSL"
form = "csv"
time_zone = "gmt"
units = "metric"

noaa_predicted_time_series_MSL = {}

for _, (station_id, name) in tidal_stations[["id", "name"]].iterrows():

    # meta data
    station_code = name.replace(" ", "_").replace(",", "")
    request = f"https://tidesandcurrents.noaa.gov/api/datagetter?begin_date={begin_date}&end_date={end_date}&station={station_id}&product={product}&datum={datum}&units={units}&time_zone={time_zone}&application=ERDC&format={form}&interval={interval}"
    out_fn = noaa_prediction_data / f"{station_code}_{product}_ts_{datum}.csv"

    # conditionally download
    if not out_fn.exists():
        csv, http = urllib.request.urlretrieve(request, out_fn)
        pass
    else:
        csv = out_fn
        pass

    noaa_data = pd.read_csv(
        csv, index_col=[0], parse_dates=True, names=["time", "prediction"], header=0
    )
    noaa_predicted_time_series_MSL[station_code] = noaa_data

In [None]:
# NOAA observed time series data
product = "hourly_height"
interval = "h"
datum = "NAVD"
form = "csv"
time_zone = "gmt"
units = "metric"

noaa_measured_time_series = {}

for _, (station_id, name) in tidal_stations[["id", "name"]].iterrows():

    # meta data
    station_code = name.replace(" ", "_").replace(",", "")
    request = f"https://tidesandcurrents.noaa.gov/api/datagetter?begin_date={begin_date}&end_date={end_date}&station={station_id}&product={product}&datum={datum}&units={units}&time_zone={time_zone}&application=ERDC&format={form}"
    out_fn = noaa_prediction_data / f"{station_code}_{product}_ts_{datum}.csv"

    # conditionally download
    if not out_fn.exists():
        csv, http = urllib.request.urlretrieve(request, out_fn)
        pass
    else:
        csv = out_fn
        pass

    noaa_data = pd.read_csv(
        csv,
        index_col=[0],
        parse_dates=True,
        names=["time", "water_level"],
        header=0,
        usecols=[0, 1],
    )
    noaa_measured_time_series[station_code] = noaa_data

In [None]:
# NOAA observed time series data
product = "hourly_height"
interval = "h"
datum = "MSL"
form = "csv"
time_zone = "gmt"
units = "metric"

noaa_measured_time_series_MSL = {}

for _, (station_id, name) in tidal_stations[["id", "name"]].iterrows():

    # meta data
    station_code = name.replace(" ", "_").replace(",", "")
    request = f"https://tidesandcurrents.noaa.gov/api/datagetter?begin_date={begin_date}&end_date={end_date}&station={station_id}&product={product}&datum={datum}&units={units}&time_zone={time_zone}&application=ERDC&format={form}"
    out_fn = noaa_prediction_data / f"{station_code}_{product}_ts_{datum}.csv"

    # conditionally download
    if not out_fn.exists():
        csv, http = urllib.request.urlretrieve(request, out_fn)
        pass
    else:
        csv = out_fn
        pass

    noaa_data = pd.read_csv(
        csv,
        index_col=[0],
        parse_dates=True,
        names=["time", "water_level"],
        header=0,
        usecols=[0, 1],
    )
    noaa_measured_time_series_MSL[station_code] = noaa_data

# Error Analysis

## verified data

In [None]:
rc = {"font.size": 16}
nrmse_col = []
rmse_col = []
error_col = []
r2_col = []
xs = []
ys = []
good_stations = []
datum = "NAVD88"
norm = "tide"
aligned_data = {}
stop_wl = -8

# t0 = pd.to_datetime(begin_date)
# tf = pd.to_datetime(end_date)
# tf = pd.to_datetime('2018-06-25 9:00')

# drop restart data
# t1 = pd.to_datetime('2018-04-01')
# t2 = pd.to_datetime('2018-05-01')
# drop_times = pd.date_range(t1, t2, freq='H')

t0 = noaa_his_data.time[0].values
t0 = pd.to_datetime("2018-01-15")
tf = noaa_his_data.time[-1].values

with plt.rc_context(rc=rc):
    for station in noaa_his_data.coords["station_name"].values[:stop_wl]:
        waterlevel = (
            noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
        )
        station = station.decode("ascii")

        # quick fix for error in station name
        if station == "ilots_Station_East_S.W._Pass":
            station = "Pilots_Station_East_S.W._Pass"

        # exclude observation points outside domain
        if np.isnan(waterlevel).all():
            continue
        else:
            pass

        # excludes NOAA which didn't have datum
        noaa_data = noaa_measured_time_series[station].loc[t0:tf]
        if not noaa_data["water_level"].any():
            continue

        predicted_noaa_data = noaa_predicted_time_series_MSL[station]
        predicted_noaa_data = noaa_predicted_time_series_MSL[station].loc[t0:tf]

        # D3D results
        xs.append(waterlevel["station_x_coordinate"])
        ys.append(waterlevel["station_y_coordinate"])
        good_stations.append(station)

        # align
        df = pd.DataFrame(columns=["modeled", "observed"], index=noaa_data.index)
        df["modeled"] = waterlevel.loc[noaa_data.index]
        df["observed"] = noaa_data["water_level"]
        aligned_data[station] = df

        # stats
        rmse = np.sqrt(
            np.mean((waterlevel.to_series() - noaa_data["water_level"]) ** 2)
        )

        if norm == "tide":
            nrmse = rmse / (
                predicted_noaa_data["prediction"].max()
                - predicted_noaa_data["prediction"].min()
            )
        elif norm == "water_level":
            nrmse = rmse / (
                noaa_data["water_level"].max() - noaa_data["water_level"].min()
            )

        error = np.mean(waterlevel.to_series() - noaa_data["water_level"])
        error_col.append(error)
        rmse_col.append(rmse)
        nrmse_col.append(nrmse)

        # plots
        obs = aligned_data[station]["observed"]
        mod = aligned_data[station]["modeled"]

        r2 = metrics.r2(mod, obs)
        r2_col.append(r2)

        fig, ax = one2one(obs, mod)
        ax.set_title(station.replace("_", " "))
        fn = figures / f"noaa_verified_scatter_water_level_{station}_{datum}_{case}.png"
        fig.savefig(fn, bbox_inches="tight")
        plt.close(fig)

        fig, ax = plt.subplots(figsize=(16, 9))
        ax.set_title(station)
        waterlevel.plot(ax=ax)
        noaa_data.plot(ax=ax, lw=1, alpha=0.85)
        ax.legend(["Delft3D-FM", "NOAA verified"])
        ax.set_xlabel("")
        ax.set_title(f"Water level ({station})")
        ax.set_ylabel("water level [m, NAVD88]")
        ax.set_xlim(left=noaa_data.index[0], right=noaa_data.index[-1])
        fn = (
            figures
            / f"noaa_verified_comparison_water_level_{station}_{datum}_{case}.png"
        )
        fig.savefig(fn, bbox_inches="tight", dpi=300)
        plt.close(fig)

In [None]:
verified_stats = pd.DataFrame(
    np.stack([good_stations, nrmse_col, rmse_col, error_col, r2_col, xs, ys], axis=1),
    columns=["station", "nrmse", "rmse_m", "bias", "r2", "lon", "lat"],
).astype(
    {
        "station": str,
        "nrmse": float,
        "rmse_m": float,
        "bias": float,
        "r2": float,
        "lon": float,
        "lat": float,
    }
)
verified_stats_fn = output / f"verified_stats_{case}.csv"
verified_stats.to_csv(verified_stats_fn, index=False)

In [None]:
# Normalized RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    verified_stats.lon,
    verified_stats.lat,
    s=80,
    c=100 * verified_stats.nrmse,
    transform=ccrs.PlateCarree(),
    zorder=10,
    cmap="jet",
    vmin=0,
    vmax=30
)
cbar = plt.colorbar(im)
if norm == "tide":
    cbar.set_label("normalized rmse [% tidal range]")
else:
    cbar.set_label("normalized rmse [%]")

for _, (station, _, _, _, _, lon, lat) in verified_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Normalized RMSE of NOAA verified observations")
fn = figures / f"nrmse_norm-{norm}_spatial_distribution_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot
rc = {"font.size": 12}
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    verified_stats.plot.bar(ax=ax, x="station", y="nrmse")
    ax.set_xlabel("")
    ax.set_ylabel("normalized RMSE [.]")


ax.set_title("Normalized RMSE for tidal prediction")
fn = figures / f"nrmse_bar_plot_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    verified_stats.lon,
    verified_stats.lat,
    s=80,
    c=verified_stats.rmse_m,
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=0,
)
cbar = plt.colorbar(im)
cbar.set_label("rmse [m]")

for _, (station, _, _, _, _, lon, lat) in verified_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("RMSE of tidal prediction")
fn = figures / f"rmse_spatial_distribution_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot
rc = {"font.size": 12}
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    verified_stats.plot.bar(ax=ax, x="station", y="rmse_m")
    ax.set_xlabel("")
    ax.set_ylabel("RMSE [m]")


ax.set_title("RMSE for tidal prediction")
fn = figures / f"rmse_bar_plot_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot bias
rc = {"font.size": 12}
stat = "bias"
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    verified_stats.plot.bar(ax=ax, x="station", y=f"{stat}")
    ax.set_xlabel("")
    ax.set_ylabel("bias [m]")


ax.set_title("Bias for tidal prediction")
fn = figures / f"{stat}_bar_plot_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    verified_stats.lon,
    verified_stats.lat,
    s=80,
    c=verified_stats.bias,
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=0,
)
cbar = plt.colorbar(im)
cbar.set_label("bias [m]")

for _, (station, _, _, _, _, lon, lat) in verified_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Bias relative to verified data")
fn = figures / f"bias_spatial_distribution_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

## verified LMSL datum

In [None]:
rc = {"font.size": 16}
nrmse_col = []
rmse_col = []
error_col = []
r2_col = []
xs = []
ys = []
good_stations = []
datum = "LMSL"
norm = "tide"
aligned_data = {}
stop_wl = -8

t0 = noaa_his_data.time[0].values
t0 = pd.to_datetime("2018-01-15")
tf = noaa_his_data.time[-1].values

with plt.rc_context(rc=rc):
    for station in noaa_his_data.coords["station_name"].values[:stop_wl]:
        waterlevel = (
            noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
        )
        station = station.decode("ascii")

        # quick fix for error in station name
        if station == "ilots_Station_East_S.W._Pass":
            station = "Pilots_Station_East_S.W._Pass"

        # exclude observation points outside domain
        if np.isnan(waterlevel).all():
            continue
        else:
            pass

        # excludes NOAA which didn't have datum
        noaa_data = noaa_measured_time_series_MSL[station].loc[t0:tf]
        if not noaa_data["water_level"].any():
            continue

        predicted_noaa_data = noaa_predicted_time_series_MSL[station]
        predicted_noaa_data = noaa_predicted_time_series_MSL[station].loc[t0:tf]

        # manual shift to local MSL
        eps = waterlevel.mean()
        waterlevel = waterlevel - eps

        # manual shift to local MSL
        eps2 = noaa_data.mean()
        noaa_data = noaa_data - eps2

        # D3D results
        xs.append(waterlevel["station_x_coordinate"])
        ys.append(waterlevel["station_y_coordinate"])
        good_stations.append(station)

        # align
        df = pd.DataFrame(columns=["modeled", "observed"], index=noaa_data.index)
        df["modeled"] = waterlevel.loc[noaa_data.index]
        df["observed"] = noaa_data["water_level"]
        aligned_data[station] = df

        # stats
        rmse = np.sqrt(
            np.mean((waterlevel.to_series() - noaa_data["water_level"]) ** 2)
        )

        if norm == "tide":
            nrmse = rmse / (
                predicted_noaa_data["prediction"].max()
                - predicted_noaa_data["prediction"].min()
            )
        elif norm == "water_level":
            nrmse = rmse / (
                noaa_data["water_level"].max() - noaa_data["water_level"].min()
            )

        error = np.mean(waterlevel.to_series() - noaa_data["water_level"])
        error_col.append(error)
        rmse_col.append(rmse)
        nrmse_col.append(nrmse)

        # plots
        obs = aligned_data[station]["observed"]
        mod = aligned_data[station]["modeled"]

        r2 = metrics.r2(mod, obs)
        r2_col.append(r2)

        fig, ax = one2one(obs, mod, quantity_str="water level [m, LMSL]")
        ax.set_title(station.replace("_", " "))
        fn = figures / f"noaa_verified_scatter_water_level_{station}_{datum}_{case}.png"
        fig.savefig(fn, bbox_inches="tight")
        plt.close(fig)

        fig, ax = plt.subplots(figsize=(16, 9))
        ax.set_title(station)
        waterlevel.plot(ax=ax)
        noaa_data.plot(ax=ax, lw=1, alpha=0.85)
        ax.legend(["Delft3D-FM", "NOAA verified"])
        ax.set_xlabel("")
        ax.set_title(f"Water level ({station})")
        ax.set_ylabel("water level [m, NAVD88]")
        ax.set_xlim(left=noaa_data.index[0], right=noaa_data.index[-1])
        fn = (
            figures
            / f"noaa_verified_comparison_water_level_{station}_{datum}_{case}.png"
        )
        fig.savefig(fn, bbox_inches="tight", dpi=300)
        plt.close(fig)

In [None]:
verified_LMSL_stats = pd.DataFrame(
    np.stack([good_stations, nrmse_col, rmse_col, error_col, r2_col, xs, ys], axis=1),
    columns=["station", "nrmse", "rmse_m", "bias", "r2", "lon", "lat"],
).astype(
    {
        "station": str,
        "nrmse": float,
        "rmse_m": float,
        "bias": float,
        "r2": float,
        "lon": float,
        "lat": float,
    }
)
verified_LMSL_stats_fn = output / f"verified_LSML_stats_{case}.csv"
verified_LMSL_stats.to_csv(verified_LMSL_stats_fn, index=False)

In [None]:
# Normalized RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    verified_LMSL_stats.lon,
    verified_LMSL_stats.lat,
    s=80,
    c=100 * verified_LMSL_stats.nrmse,
    transform=ccrs.PlateCarree(),
    zorder=10,
    cmap="jet",
    vmin=0,
    vmax=25
)
cbar = plt.colorbar(im)
if norm == "tide":
    cbar.set_label("normalized rmse [% tidal range]")
else:
    cbar.set_label("normalized rmse [%]")

for _, (station, _, _, _, _, lon, lat) in verified_LMSL_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Normalized RMSE of NOAA verified observations")
fn = figures / f"nrmse_norm-{norm}_spatial_distribution_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    verified_LMSL_stats.lon,
    verified_LMSL_stats.lat,
    s=80,
    c=verified_LMSL_stats.rmse_m,
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=0,
    vmax=0.15
)
cbar = plt.colorbar(im)
cbar.set_label("rmse [m]")

for _, (station, _, _, _, _, lon, lat) in verified_LMSL_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Bias relative to verified data")
fn = figures / f"rmse_spatial_distribution_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# bias plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    verified_LMSL_stats.lon,
    verified_LMSL_stats.lat,
    s=80,
    c=verified_LMSL_stats.bias,
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=0,
)
cbar = plt.colorbar(im)
cbar.set_label("bias [m]")

for _, (station, _, _, _, _, lon, lat) in verified_LMSL_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Bias relative to verified data")
fn = figures / f"bias_spatial_distribution_{datum}_{case}_verified.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

## predicted LMSL datum

In [None]:
# NOAA time series with shift to LMSL
rc = {"font.size": 16}
nrmse_col = []
rmse_col = []
error_col = []
xs = []
ys = []
good_stations = []
datum = "LMSL"

with plt.rc_context(rc=rc):
    for station in noaa_his_data.coords["station_name"].values:
        waterlevel = noaa_his_data["waterlevel"].loc[t0:tf, station]
        station = station.decode("ascii")

        # quick fix for error in station name
        if station == "ilots_Station_East_S.W._Pass":
            station = "Pilots_Station_East_S.W._Pass"

        # exclude observation points outside domain
        if np.isnan(waterlevel).all():
            continue
        else:
            pass

        # excludes NOAA which didn't have datum
        predicted_noaa_data = noaa_predicted_time_series_MSL[station]
        if not predicted_noaa_data["prediction"].any():
            print(station)
            continue

        predicted_noaa_data = noaa_predicted_time_series_MSL[station].loc[t0:tf]
        waterlevel = waterlevel.loc[predicted_noaa_data.index]

        # drop instabilites introduced by restart
        # predicted_noaa_data = predicted_noaa_data.drop(labels=drop_times)
        # waterlevel = waterlevel.drop_sel(time=drop_times)

        # manual shift to local MSL
        eps = waterlevel.mean()
        waterlevel = waterlevel - eps

        # manual shift to local MSL
        eps2 = predicted_noaa_data.mean()
        predicted_noaa_data = predicted_noaa_data - eps2

        # D3D results
        xs.append(waterlevel["station_x_coordinate"])
        ys.append(waterlevel["station_y_coordinate"])
        good_stations.append(station)

        # stats
        rmse = np.sqrt(
            np.mean((waterlevel.to_series() - predicted_noaa_data["prediction"]) ** 2)
        )
        nrmse = rmse / (
            predicted_noaa_data["prediction"].max()
            - predicted_noaa_data["prediction"].min()
        )
        error = np.mean(waterlevel.to_series() - predicted_noaa_data["prediction"])
        error_col.append(error)
        rmse_col.append(rmse)
        nrmse_col.append(nrmse)

        # plot
        fig, ax = plt.subplots(figsize=(16, 9))
        ax.set_title(station)
        waterlevel.plot(ax=ax, color="b")
        predicted_noaa_data.plot(ax=ax, color="k", lw=1)
        ax.legend(["Delft3D-FM", "NOAA prediction"])
        ax.set_xlabel("")
        ax.set_title(f"Water level ({station})")
        ax.set_ylabel("water level [m, NAVD88]")
        ax.set_xlim(
            left=predicted_noaa_data.index[0], right=predicted_noaa_data.index[-1]
        )
        fn = (
            figures
            / f"noaa_prediction_comparison_water_level_{station}_{datum}_{case}.png"
        )
        fig.savefig(fn, bbox_inches="tight", dpi=300)
        plt.close(fig)

In [None]:
LMSL_stats = pd.DataFrame(
    np.stack([good_stations, nrmse_col, rmse_col, error_col, xs, ys], axis=1),
    columns=["station", "nrmse", "rmse_m", "bias", "lon", "lat"],
).astype(
    {
        "station": str,
        "nrmse": float,
        "rmse_m": float,
        "bias": float,
        "lon": float,
        "lat": float,
    }
)
LMSL_stats_fn = output / f"stats_LMSL_{case}.csv"
LMSL_stats.to_csv(LMSL_stats_fn, index=False)

In [None]:
# Normalized RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    LMSL_stats.lon,
    LMSL_stats.lat,
    s=80,
    c=100 * LMSL_stats.nrmse,
    transform=ccrs.PlateCarree(),
    zorder=10,
    cmap="jet",
    vmin=0,
)
cbar = plt.colorbar(im)
cbar.set_label("normalized rmse [% tidal range]")

for _, (station, _, _, _, lon, lat) in LMSL_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Normalized RMSE of tidal prediction")
fn = figures / f"nrmse_spatial_distribution_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot
rc = {"font.size": 12}
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    LMSL_stats.plot.bar(ax=ax, x="station", y="nrmse")
    ax.set_xlabel("")
    ax.set_ylabel("normalized RMSE [.]")


ax.set_title("Normalized RMSE for tidal prediction")
fn = figures / f"nrmse_bar_plot_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    LMSL_stats.lon,
    LMSL_stats.lat,
    s=80,
    c=LMSL_stats.rmse_m,
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=0,
)
cbar = plt.colorbar(im)
cbar.set_label("rmse [m]")

for _, (station, _, _, _, lon, lat) in LMSL_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("RMSE of tidal prediction")
fn = figures / f"rmse_spatial_distribution_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot
rc = {"font.size": 12}
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    LMSL_stats.plot.bar(ax=ax, x="station", y="rmse_m")
    ax.set_xlabel("")
    ax.set_ylabel("RMSE [m]")


ax.set_title("RMSE for tidal prediction")
fn = figures / f"rmse_bar_plot_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

## NAVD88 datum predicted data

In [None]:
rc = {"font.size": 16}
nrmse_col = []
rmse_col = []
error_col = []
xs = []
ys = []
good_stations = []

with plt.rc_context(rc=rc):
    for station in noaa_his_data.coords["station_name"].values:
        waterlevel = noaa_his_data["waterlevel"].loc[t0:tf, station]
        station = station.decode("ascii")

        # quick fix for error in station name
        if station == "ilots_Station_East_S.W._Pass":
            station = "Pilots_Station_East_S.W._Pass"

        # exclude observation points outside domain
        if np.isnan(waterlevel).all():
            continue
        else:
            pass

        # excludes NOAA which didn't have datum
        predicted_noaa_data = noaa_predicted_time_series[station]
        if not predicted_noaa_data["prediction"].any():
            continue

        predicted_noaa_data = noaa_predicted_time_series[station].loc[t0:tf]
        waterlevel = waterlevel.loc[predicted_noaa_data.index]

        # D3D results
        xs.append(waterlevel["station_x_coordinate"])
        ys.append(waterlevel["station_y_coordinate"])
        good_stations.append(station)

        # stats
        rmse = np.sqrt(
            np.mean((waterlevel.to_series() - predicted_noaa_data["prediction"]) ** 2)
        )
        nrmse = rmse / (
            predicted_noaa_data["prediction"].max()
            - predicted_noaa_data["prediction"].min()
        )
        error = np.mean(waterlevel.to_series() - predicted_noaa_data["prediction"])
        error_col.append(error)
        rmse_col.append(rmse)
        nrmse_col.append(nrmse)

        # plot
        fig, ax = plt.subplots(figsize=(16, 9))
        ax.set_title(station)
        waterlevel.plot(ax=ax, color="b")
        predicted_noaa_data.plot(ax=ax, color="k", lw=1)
        ax.legend(["Delft3D-FM", "NOAA prediction"])
        ax.set_xlabel("")
        ax.set_title(f"Water level ({station})")
        ax.set_ylabel("water level [m, NAVD88]")
        ax.set_xlim(
            left=predicted_noaa_data.index[0], right=predicted_noaa_data.index[-1]
        )
        fn = (
            figures
            / f"noaa_prediction_comparison_water_level_{station}_{datum}_{case}.png"
        )
        fig.savefig(fn, bbox_inches="tight", dpi=300)
        plt.close(fig)

In [None]:
predicted_stats = pd.DataFrame(
    np.stack([good_stations, nrmse_col, rmse_col, error_col, xs, ys], axis=1),
    columns=["station", "nrmse", "rmse_m", "bias", "lon", "lat"],
).astype(
    {
        "station": str,
        "nrmse": float,
        "rmse_m": float,
        "bias": float,
        "lon": float,
        "lat": float,
    }
)
predicted_stats_fn = output / f"predicted_rmse_{case}.csv"
predicted_stats.to_csv(predicted_stats_fn, index=False)

In [None]:
# RMSE plot
datum = "NAVD88"
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    predicted_stats.lon,
    predicted_stats.lat,
    s=80,
    c=predicted_stats.rmse_m,
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=0,
)
cbar = plt.colorbar(im)
cbar.set_label("rmse [m]")

for _, (station, _, _, _, lon, lat) in predicted_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("RMSE of tidal prediction")
fn = figures / f"rmse_spatial_distribution_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot
rc = {"font.size": 12}
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    predicted_stats.plot.bar(ax=ax, x="station", y="rmse_m")
    ax.set_xlabel("")
    ax.set_ylabel("RMSE [m]")


ax.set_title("RMSE for tidal prediction")
fn = figures / f"rmse_bar_plot_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# Normalized RMSE plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
im = ax.scatter(
    predicted_stats.lon,
    predicted_stats.lat,
    s=80,
    c=100 * predicted_stats.nrmse,
    transform=ccrs.PlateCarree(),
    zorder=10,
    cmap="jet",
    vmin=0,
    vmax=17.5,
)
cbar = plt.colorbar(im)
cbar.set_label("normalized rmse [% tidal range]")

for _, (station, _, _, _, lon, lat) in predicted_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title("Normalized RMSE of tidal prediction")
fn = figures / f"nrmse_spatial_distribution_{datum}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# error bar plot
rc = {'font.size': 12}
with plt.rc_context(rc=rc):
    fig, ax = plt.subplots(figsize=(16, 9))
    predicted_stats.plot.bar(ax=ax, x='station', y='nrmse')
    ax.set_xlabel('')
    ax.set_ylabel('normalized RMSE [.]')
    #ax.set_ylim([0, 0.175])


ax.set_title('Normalized RMSE for tidal prediction')
fn = figures / f'nrmse_bar_plot_{datum}_{case}.png'
fig.savefig(fn, bbox_inches='tight', dpi=300)

In [None]:
# Consts plots
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution="10m", color="black", linewidth=1)
const = "Sa"
im = ax.scatter(
    LMSL_stats.lon,
    LMSL_stats.lat,
    s=80,
    c=consts_data["amp_error"].sel(consts=const),
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap="jet",
    vmin=-0.15,
    vmax=0.0,
)
cbar = plt.colorbar(im)
cbar.set_label("abs. error [m]")

for _, (station, _, _, _, lon, lat) in LMSL_stats.iterrows():
    ax.text(
        lon - 0.05,
        lat - 0.05,
        station.split("_")[0],
        horizontalalignment="right",
        transform=ccrs.PlateCarree(),
        zorder=101,
    )
    pass

ax.set_title(f"Absolute error in {const} amplitude")
fn = figures / f"amp_error_spatial_distribution_{const}_{case}.png"
fig.savefig(fn, bbox_inches="tight", dpi=300)

In [None]:
# Const plot
extent = [-93.5, -87, 28, 31]
fig = plt.figure(figsize=(16, 6.5))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent, ccrs.PlateCarree())
ax.coastlines(resolution='10m', color='black', linewidth=1)
const = 'Ssa'
im = ax.scatter(
    LMSL_stats.lon,
    LMSL_stats.lat,
    s=80,
    c=consts_data['amp_error'].sel(consts=const),
    transform=ccrs.PlateCarree(),
    zorder=100,
    cmap='jet',
    vmin=-0.1,
    vmax=0.
)
cbar = plt.colorbar(im)
cbar.set_label("abs. error [m]")

for _, (station, _, _, _, lon, lat) in LMSL_stats.iterrows():
    ax.text(lon-0.05, lat-0.05, station.split('_')[0], horizontalalignment='right', transform=ccrs.PlateCarree(), zorder=101)
    pass

ax.set_title(f'Absolute error in {const} amplitude')
fn = figures / f'amp_error_spatial_distribution_{const}_{case}.png'
fig.savefig(fn, bbox_inches='tight', dpi=300)

# Sa and Ssa correction

In [None]:
Sa_drop_stations = [
    "Pilottown",
    "New_Canal_Station",
    "I-10_Bonnet_Carre_Floodway",
    "Coast_Guard_Sector_Mobile",
]
Sa_corr = (
    consts_data["amp_error"]
    .sel(consts=["Sa"])
    .drop_sel(station_name=Sa_drop_stations)
    .mean()
)
print(f"Sa amplitude correction:\t{-Sa_corr.data:.7f}")

Ssa_drop_stations = [
    "Pilottown",
    "New_Canal_Station",
    "I-10_Bonnet_Carre_Floodway",
    "Coast_Guard_Sector_Mobile",
]
Ssa_corr = (
    consts_data["amp_error"]
    .sel(consts=["Ssa"])
    .drop_sel(station_name=Sa_drop_stations)
    .mean()
)
print(f"Ssa amplitude correction:\t{-Ssa_corr.data:.7f}")

## scratch plots

In [None]:
rc = {'font.size': 16}
nrmse_col = []
rmse_col = []
error_col = []
xs = []
ys = []
good_stations = []

t0 = pd.to_datetime(begin_date)
tf = pd.to_datetime(end_date)

SA_omega = 2*np.pi/(365*24)
SSA_omega = 2*np.pi/(365/2*24)

wt = pytide.WaveTable(important_consts_caps)


with plt.rc_context(rc=rc):
    for station in his_data.coords['station_name'].values:
        waterlevel = his_data['waterlevel'].loc[t0:tf, station]
        model_hrs = (pd.to_timedelta(waterlevel.time - waterlevel.time[0]).total_seconds() / (60*60)).values
        station = station.decode('ascii')
        time = waterlevel['time'].values.astype("datetime64[s]")
        
        # exclude observation points outside domain
        if np.isnan(waterlevel).all():
            continue
        else:
            pass
        
        # manually compute SA and SSA
        # nodal factors are one for SA and SSA, so they are ignored
        #_, vu = wt.compute_nodal_modulations(time)
        #A_vu = vu[13, :]
        #SA_vu = vu[14, :]
        
        
        SA_amp = amplitudes.loc[station]['SA']
        SA_phase = (np.pi/180)*phase_angles.loc[station]['SA']

        SSA_amp = amplitudes.loc[station]['SSA']
        SSA_phase = (np.pi/180)*phase_angles.loc[station]['SSA']

        SA = SA_amp*np.cos(SA_omega*model_hrs - SA_phase)
        SSA = SSA_amp*np.cos(SSA_omega*model_hrs - SSA_phase)

        solar_annual = xr.DataArray(SA + SSA, coords=waterlevel.coords, dims=['time'])
        
        # add solar correction
        corrected_waterlevel = waterlevel + solar_annual
        
        # excludes NOAA which didn't have datum
        predicted_noaa_data = noaa_predicted_time_series[station]
        if not predicted_noaa_data['prediction'].any():
            continue
        
        # D3D results
        xs.append(corrected_waterlevel['station_x_coordinate'])
        ys.append(corrected_waterlevel['station_y_coordinate'])
        good_stations.append(station)
        
        # stats
        rmse = np.sqrt(np.mean((corrected_waterlevel.to_series() - predicted_noaa_data['prediction'])**2))
        nrmse = rmse / (predicted_noaa_data['prediction'].max() - predicted_noaa_data['prediction'].min())
        error = np.mean(corrected_waterlevel.to_series() - predicted_noaa_data['prediction'])
        error_col.append(error)
        rmse_col.append(rmse)
        nrmse_col.append(nrmse)
        
        # plot
        fig, ax = plt.subplots(figsize=(16, 9))
        ax.set_title(station)
        corrected_waterlevel.plot(ax=ax, color='b')
        solar_annual.plot(ax=ax, color='r', lw=4, zorder=100)
        predicted_noaa_data.plot(ax=ax, color='k', lw=1)
        ax.legend(['Delft3D-FM w/ solar', 'The solar const.', 'NOAA prediction'])
        ax.set_xlabel('')
        ax.set_title(f'Water level ({station})')
        ax.set_ylabel('water level [m, NAVD88]')
        ax.set_xlim(left=predicted_noaa_data.index[0], right=predicted_noaa_data.index[-1])
        fn = figures / f'noaa_prediction_comparison_water_level_{station}_{datum}_{case}_w_solar.png'
        fig.savefig(fn, bbox_inches='tight')
        plt.close(fig)

In [None]:
alt_stats = pd.DataFrame(np.stack([good_stations, nrmse_col, rmse_col, error_col, xs, ys], axis=1), columns=['station', 'nrmse', 'rmse_m', 'bias', 'lon', 'lat']).astype(
    {'station': str, 'nrmse': float, 'rmse_m': float, 'bias': float, 'lon': float, 'lat': float})
stats_fn = output / f'location_stats_w_solar_{case}.csv'
stats.to_csv(stats_fn, index=False)

In [None]:
rc = {'font.size': 16}
nrmse_col = []
rmse_col = []
error_col = []
xs = []
ys = []
good_stations = []

t0 = pd.to_datetime(begin_date)
tf = pd.to_datetime(end_date)

SA_omega = 2*np.pi/(365*24)
SSA_omega = 2*np.pi/(365/2*24)

wt = pytide.WaveTable(important_consts_caps)


with plt.rc_context(rc=rc):
    for station in his_data.coords['station_name'].values:
        waterlevel = his_data['waterlevel'].loc[t0:tf, station]
        model_hrs = (pd.to_timedelta(waterlevel.time - waterlevel.time[0]).total_seconds() / (60*60)).values
        station = station.decode('ascii')
        time = waterlevel['time'].values.astype("datetime64[s]")
        
        # exclude observation points outside domain
        if np.isnan(waterlevel).all():
            continue
        else:
            pass
        
        
        # excludes NOAA which didn't have datum
        predicted_noaa_data = noaa_predicted_time_series[station]
        if not predicted_noaa_data['prediction'].any():
            continue
        
        
        # manual noaa signal
        manual_noaa = np.zeros_like(model_hrs)
        for con in amplitudes.columns: 
            manual_noaa += amplitudes.loc[station][con] * np.cos((np.pi/180)*speeds.loc[station][con]*model_hrs -(np.pi/180)*phase_angles.loc[station][con])
        
        manual_noaa = xr.DataArray(manual_noaa, coords=waterlevel.coords, dims=['time'])

        # plot
        fig, ax = plt.subplots(figsize=(16, 9))
        ax.set_title(station)
        manual_noaa.plot(ax=ax, color='b')
        predicted_noaa_data.plot(ax=ax, color='k', lw=1)
        ax.legend(['manual NOAA prediction', 'NOAA prediction'])
        ax.set_xlabel('')
        ax.set_title(f'Water level ({station})')
        ax.set_ylabel('water level [m, NAVD88]')
        ax.set_xlim(left=predicted_noaa_data.index[0], right=predicted_noaa_data.index[-1])
        fn = figures / f'manual_noaa_prediction_comparison_water_level_{station}.png'
        fig.savefig(fn, bbox_inches='tight')
        plt.close(fig)

In [None]:
rc = {"font.size": 16}
all_results = {}

with plt.rc_context(rc=rc):
    for station in noaa_his_data.coords["station_name"].values:
        waterlevel = noaa_his_data["waterlevel"].loc[:, station]
        station = station.decode("ascii")

        if station == "ilots_Station_East_S.W._Pass":
            station = "Pilots_Station_East_S.W._Pass"

        if np.isnan(waterlevel).all():
            continue

        w, (h, hp, time) = harmonic_analysis_model(waterlevel, important_consts_caps)
        results = get_results(
            w,
            amplitudes[important_consts_order].loc[station],
            phase_angles[important_consts_order].loc[station],
        )

        fig, ax = plt.subplots(figsize=(12, 10))
        results["amp_error"].plot.bar(ax=ax, zorder=100)
        ax.set_ylabel("amplitude error [m]")
        ax.set_title(station)
        ax.set_ylim([-0.15, 0.15])
        ax.grid(axis="y")
        fn = figures / f"amp_error_per_const_{station}_{case}.png"
        fig.savefig(fn, bbox_inches="tight")
        plt.close(fig)

        fig, ax = plt.subplots(figsize=(12, 10))
        results["phase_error"].plot.bar(ax=ax)
        ax.set_ylabel("phase error [deg]")
        ax.set_title(station)
        fn = figures / f"phase_error_per_const_{station}_{case}.png"
        fig.savefig(fn, bbox_inches="tight")
        plt.close(fig)

        all_results[station] = results

# create DataSet
station_names = list(all_results.keys())
variables = all_results[station_names[0]].columns
data = {}
for var in variables:
    tmp = pd.DataFrame(index=station_names, columns=important_consts_caps)
    for station_name in station_names:
        tmp.loc[station_name][important_consts_caps] = all_results[station_name][var]
    data[var] = ({"station_name": station_names, "consts": important_consts_caps}, tmp)
consts_data = xr.Dataset(
    data, coords={"station_name": station_names, "consts": important_consts_caps}
)

# nice figures

In [None]:
t0 = pd.to_datetime("2018-01-15")
tf = pd.to_datetime("2018-11-23")
fig, axes = plt.subplots(nrows=3, sharex=True, figsize=(10, 9))
stations = [b"Calcasieu_Pass", b"Freshwater_Canal_Locks", b"LAWMA_Amerada_Pass"]
i = 0
datum = "LMSL"
for station in noaa_his_data.sel(station_name=stations).coords["station_name"].values:
    ax = axes[i]
    waterlevel = (
        noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
    )
    station = station.decode("ascii")

    noaa_data = noaa_measured_time_series_MSL[station].loc[t0:tf]

    waterlevel = waterlevel.loc[noaa_data.index]

    # manual shift to local MSL
    eps = waterlevel.mean()
    waterlevel = waterlevel - eps

    # manual shift to local MSL
    eps2 = noaa_data.mean()
    noaa_data = noaa_data - eps2

    ax.set_title(station)
    waterlevel.plot(ax=ax, lw=0.9, zorder=100)
    noaa_data.plot(ax=ax, lw=1.2)

    if i == 0:
        ax.legend(["Model", "NOAA verified obs."])
        ax.set_xlabel("")
        ax.set_ylabel("")
    if i == 1:
        ax.set_xlabel("")
        ax.set_ylabel("water level [m, LMSL]")
        ax.get_legend().remove()
    if i == 2:
        ax.set_ylabel("")
        ax.get_legend().remove()
        ax.set_xlabel("")
    ax.set_title(station.replace("_", " "))
    ax.set_xlim(left=noaa_data.index[0], right=noaa_data.index[-1])

    i += 1
plt.subplots_adjust(hspace=0.22)
fig.savefig(
    figures / f"{case}_joint_water_level_west_side_{datum}.png", bbox_inches="tight"
)

In [None]:
fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True, figsize=(4, 9))
stations = [b"Calcasieu_Pass", b"Freshwater_Canal_Locks", b"LAWMA_Amerada_Pass"]
i = 0
for station in noaa_his_data.sel(station_name=stations).coords["station_name"].values:
    ax = axes[i]
    waterlevel = (
        noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
    )
    station = station.decode("ascii")

    # manual shift to local MSL
    eps = waterlevel.mean()
    #waterlevel = waterlevel - eps

    # manual shift to local MSL
    eps2 = noaa_data.mean()
    #noaa_data = noaa_data - eps2

    noaa_data = noaa_measured_time_series[station].loc[t0:tf]
    stats = verified_LMSL_stats.where(verified_LMSL_stats.station == station).dropna()

    r2 = stats["r2"].values[0]
    rmse_m = stats["rmse_m"].values[0]

    # align
    df = pd.DataFrame(columns=["modeled", "observed"], index=noaa_data.index)
    df["modeled"] = waterlevel.loc[noaa_data.index]
    df["observed"] = noaa_data["water_level"]
    aligned_data[station] = df

    ax = one2one(
        df["observed"],
        df["modeled"],
        lims=[-1.25, 1.25],
        ax=ax,
        quantity_str="water level [m, LMSL]",
    )
    ax.yaxis.tick_right()
    ax.yaxis.set_label_position("right")

    ax.text(
        0.1,
        0.9,
        f"$r^2=${r2:0.2f}",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=12,
    )

    ax.text(
        0.1,
        0.8,
        f"rmse={100*rmse_m:0.2f} cm",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=12,
    )

    if i == 0:
        ax.set_xlabel("")
        ax.set_ylabel("")
    if i == 1:
        ax.set_xlabel("")
    if i == 2:
        ax.set_ylabel("")

    i += 1

plt.subplots_adjust(hspace=0.18)
fig.savefig(
    figures / f"{case}_joint_scatter_west_side_{datum}.png", bbox_inches="tight"
)

In [None]:
fig, axes = plt.subplots(nrows=3, sharex=True, figsize=(10, 9))
stations = [b"Dauphin_Island", b"Dog_River_Bridge", b"Bay_Waveland_Yacht_Club"]
i = 0
for station in noaa_his_data.sel(station_name=stations).coords["station_name"].values:
    ax = axes[i]
    waterlevel = (
        noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
    )
    station = station.decode("ascii")

    noaa_data = noaa_measured_time_series_MSL[station].loc[t0:tf]
    waterlevel = waterlevel.loc[noaa_data.index]

    # manual shift to local MSL
    eps = waterlevel.mean()
    waterlevel = waterlevel - eps

    # manual shift to local MSL
    eps2 = noaa_data.mean()
    noaa_data = noaa_data - eps2

    ax.set_title(station)
    waterlevel.plot(ax=ax, lw=0.9, zorder=100)
    noaa_data.plot(ax=ax, lw=1.2)

    if i == 0:
        ax.legend(["Model", "NOAA verified obs."])
        ax.set_xlabel("")
        ax.set_ylabel("")
    if i == 1:
        ax.set_xlabel("")
        ax.set_ylabel("water level [m, NAVD88]")
        ax.get_legend().remove()
    if i == 2:
        ax.set_ylabel("")
        ax.get_legend().remove()
        ax.set_xlabel("")
    ax.set_title(station.replace("_", " "))
    # ax.set_xlim(left=noaa_data.index[0], right=noaa_data.index[-1])
    ax.set_xlim(t0, tf)

    i += 1
plt.subplots_adjust(hspace=0.22)
fig.savefig(
    figures / f"{case}_joint_water_level_east_side_{datum}.png", bbox_inches="tight"
)

In [None]:
fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True, figsize=(4, 9))
stations = [b"Dauphin_Island", b"Dog_River_Bridge", b"Bay_Waveland_Yacht_Club"]
i = 0
for station in noaa_his_data.sel(station_name=stations).coords["station_name"].values:
    ax = axes[i]
    waterlevel = noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
    station = station.decode("ascii")

    noaa_data = noaa_measured_time_series_MSL[station].loc[t0:tf]
    stats = verified_LMSL_stats.where(verified_LMSL_stats.station == station).dropna()

    r2 = stats["r2"].values[0]
    rmse_m = stats["rmse_m"].values[0]
    
    # manual shift to local MSL
    eps = waterlevel.mean()
    waterlevel = waterlevel - eps

    # manual shift to local MSL
    eps2 = noaa_data.mean()
    noaa_data = noaa_data - eps2

    # align
    df = pd.DataFrame(columns=["modeled", "observed"], index=noaa_data.index)
    df["modeled"] = waterlevel.loc[noaa_data.index]
    df["observed"] = noaa_data["water_level"]
    aligned_data[station] = df

    ax = one2one(df["observed"], df["modeled"], lims=[-1.25, 1.25], ax=ax)
    ax.yaxis.tick_right()
    ax.yaxis.set_label_position("right")

    ax.text(
        0.1,
        0.9,
        f"$r^2=${r2:0.2f}",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=12
    )
    
    ax.text(
        0.1,
        0.8,
        f"rmse={100*rmse_m:0.2f} cm",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=12
    )

    if i == 0:
        ax.set_xlabel("")
        ax.set_ylabel("")
    if i == 1:
        ax.set_xlabel("")
    if i == 2:
        ax.set_ylabel("")

    i += 1

plt.subplots_adjust(hspace=0.18)
fig.savefig(figures / f"{case}_joint_scatter_east_side.png", bbox_inches="tight")

In [None]:
fig, axes = plt.subplots(nrows=3, sharex=True, figsize=(10, 9))
stations = [b"Shell_Beach", b"New_Canal_Station", b"I-10_Bonnet_Carre_Floodway"]
i = 0
for station in noaa_his_data.sel(station_name=stations).coords["station_name"].values:
    ax = axes[i]
    waterlevel = (
        noaa_his_data["waterlevel"].loc[t0:tf, station]
    )
    station = station.decode("ascii")

    noaa_data = noaa_measured_time_series_MSL[station].loc[t0:tf]

    # manual shift to local MSL
    eps = waterlevel.mean()
    waterlevel = waterlevel - eps

    # manual shift to local MSL
    eps2 = noaa_data.mean()
    noaa_data = noaa_data - eps2

    ax.set_title(station)
    waterlevel.plot(ax=ax, lw=0.9, zorder=100)
    noaa_data.plot(ax=ax, lw=1.2)

    if i == 0:
        ax.legend(["Model", "NOAA verified obs."])
        ax.set_xlabel("")
        ax.set_ylabel("")
    if i == 1:
        ax.set_xlabel("")
        ax.set_ylabel("water level [m, NAVD88]")
        ax.get_legend().remove()
    if i == 2:
        ax.set_ylabel("")
        ax.get_legend().remove()
        ax.set_xlabel("")
    ax.set_title(station.replace("_", " "))
    # ax.set_xlim(left=noaa_data.index[0], right=noaa_data.index[-1])
    ax.set_xlim(t0, tf)

    i += 1
plt.subplots_adjust(hspace=0.22)
fig.savefig(
    figures / f"{case}_joint_water_level_inland_side_{datum}.png", bbox_inches="tight"
)

In [None]:
fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True, figsize=(4, 9))
i = 0
for station in noaa_his_data.sel(station_name=stations).coords["station_name"].values:
    ax = axes[i]
    waterlevel = noaa_his_data["waterlevel"].loc[t0:tf, station].drop_duplicates(dim="time")
    station = station.decode("ascii")

    noaa_data = noaa_measured_time_series_MSL[station].loc[t0:tf]
    stats = verified_LMSL_stats.where(verified_LMSL_stats.station == station).dropna()

    r2 = stats["r2"].values[0]
    rmse_m = stats["rmse_m"].values[0]
    
    # manual shift to local MSL
    eps = waterlevel.mean()
    waterlevel = waterlevel - eps

    # manual shift to local MSL
    eps2 = noaa_data.mean()
    noaa_data = noaa_data - eps2

    # align
    df = pd.DataFrame(columns=["modeled", "observed"], index=noaa_data.index)
    df["modeled"] = waterlevel.loc[noaa_data.index]
    df["observed"] = noaa_data["water_level"]
    aligned_data[station] = df

    ax = one2one(df["observed"], df["modeled"], lims=[-1.25, 1.25], ax=ax)
    ax.yaxis.tick_right()
    ax.yaxis.set_label_position("right")

    ax.text(
        0.1,
        0.9,
        f"$r^2=${r2:0.2f}",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=12
    )
    
    ax.text(
        0.1,
        0.8,
        f"rmse={100*rmse_m:0.2f} cm",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=12
    )

    if i == 0:
        ax.set_xlabel("")
        ax.set_ylabel("")
    if i == 1:
        ax.set_xlabel("")
    if i == 2:
        ax.set_ylabel("")

    i += 1

plt.subplots_adjust(hspace=0.18)
fig.savefig(figures / f"{case}_joint_scatter_inland_side.png", bbox_inches="tight")