In [None]:
import gc
import os
import urllib.request
from pathlib import Path

import geopandas as gpd
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr
from scipy import signal

In [None]:
import holoviews as hv
from holoviews import opts

hv.extension("bokeh")

from bokeh.io import curdoc, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.themes import built_in_themes

output_notebook()
curdoc().theme = "light_minimal"

In [None]:
# plots
plt.style.use("seaborn-talk")

In [None]:
d3d = Path("/mnt/c/Users/rdchlclj/Projects/MR_D3D_model/Delft3D")
project = Path.cwd()
output = project / "output"
input = project / "input"
output_data = output / "data"
figures = project / "figures"

In [None]:
# Observation stations
begin_date = "2018-01-01"
end_date = "2018-11-23"
model_input = d3d / "MS_River_plume.dsproj_data/regional/input"
station_list = pd.read_csv(
    model_input / "salinity_stations_obs.xyn",
    sep="\s+",
    header=None,
    names=["lon", "lat", "station_id"],
)

station_list["station_id"] = station_list["station_id"].apply(lambda s: s.strip("'"))
station_list["station_code"] = station_list["station_id"].apply(lambda s: s.encode())
station_list = station_list.set_index(station_list.station_code).drop(
    "station_code", axis=1
)
sal_stations = list(station_list.index)

spread_sheet = pd.read_excel(
    input / "USGS_salinity_stations_2018.xls", dtype={"station_id": str}
)
spread_sheet["station_code"] = spread_sheet["station_id"].apply(lambda s: s.encode())
nicknames = pd.DataFrame(data=spread_sheet["nickname"].values, index=spread_sheet["station_code"], columns=["nickname"])
station_list = station_list.join(nicknames)

In [None]:
# model input/output
model = "p03"
model_output = Path("/mnt/g/MR_D3D_Model/Delft3D/models/prod_2018/p03_10sig_2018prod")
his_fn = model_output / f"{model}_merged_his.nc"
variables = ["waterlevel", "bedlevel", "waterdepth", "salinity"]
calib_his = (
    xr.open_dataset(his_fn)[variables]
    .swap_dims({"stations": "station_name"})
    .sel(station_name=sal_stations)
)

In [None]:
# model input/output
model = "f07"
model_output = Path("/mnt/g/MR_D3D_Model/Delft3D/models/f07_10sig_davg_sal_rst/output")
his_fn = model_output / f"{model}_10sig_davg_sal_rst_his.nc"
variables = ["waterlevel", "bedlevel", "waterdepth", "salinity"]
uncalib_his = (
    xr.open_dataset(his_fn)[variables]
    .swap_dims({"stations": "station_name"})
    .sel(station_name=sal_stations[:-5])
)

# functions

In [None]:
def apply_butterworth(discharge, buff=20, dts=25, N=5):
    """apply butterworth filter to remove tidal influence from data

    input:
    discharge = discharge dataframe
    dts       = sampling interval in minutes
    N         = filter order

    returns:
    filtered dataframe

    """
    # parameters
    crit_freq = 1 / (((24.8412 + buff) * 60 * 60))  # lundar day in hours to Hz
    fs = 1 / (dts * 60)  # sampling frequency

    b, a = signal.butter(N, crit_freq, btype="lowpass", fs=fs)

    filtered = discharge.apply(lambda x: signal.filtfilt(b, a, x))
    filtered.columns = ["discharge_cms_Butterworth_filtered"]

    return filtered


def apply_godin(discharge):
    """apply Godin filter to remove tidal influence from data

    input:
    discharge = discharge dataframe

    returns:
    filtered dataframe

    """
    # parameters
    # Godin filter (USGS standard)
    godin = (
        discharge.resample("1H")
        .mean()
        .interpolate(method="time")
        .rolling(window=24, center=True)
        .mean()
        .rolling(window=25, center=True)
        .mean()
        .rolling(window=25, center=True)
        .mean()
    )
    godin.columns = ["discharge_cms_Godin_filtered"]

    return godin


def download_nwis_data(
    site_name, site_no, begin_date, end_date, data_code=60, skiprows=28
):
    """download data from https://nwis.waterdata.usge and outputs as dataframe

    inputs:
    site_name = user specified name for site
    site_no = USGS site number code
    begin_date = first day in timeseries (YYYY-MM-dd)
    end_date = last day in timeseries (YYYY-MM-dd)
    skiprows = number of header rows to skip (default=28)

    return = discharge (pandas DataFrame)
    """

    # output file and request
    out_fn = output_data / f"{site_name}_{site_no}_{begin_date}_{end_date}.txt"
    request = f"https://nwis.waterdata.usgs.gov/usa/nwis/uv/?cb_{data_code:05d}=on&format=rdb&site_no={site_no}&period=&begin_date={begin_date}&end_date={end_date}"

    # get data
    txt, http = urllib.request.urlretrieve(request, out_fn)

    # Pandas
    try:
        data = pd.read_csv(
            txt,
            sep="\s+",
            skiprows=skiprows,
            usecols=[2, 3, 5],
            parse_dates={"datetime_CST": [0, 1]},
            header=0,
            index_col=0,
            names=["date", "time", "salinity_ppt"],
            na_values=["Eqp", "***", "--", "Dis", "Dry", "Ice", "Mnt", "Ssn", "ZFl"],
            dtype={"salinity_ppt": float}
        )
    except:
        print("Problem with parsing text ")
        os.remove(txt)
        return None

    try:
        data.index = (
            data.index.tz_localize("America/Chicago", ambiguous=True)
            .tz_convert("UTC")
            .tz_localize(None)
        )
    except AttributeError as e:
        print("Problem converting datetime to UTC. Check data")
        os.remove(txt)
        return None

    data.to_csv(
        output_data / f"{site_name}_{begin_date}.csv",
        sep="\t",
        header=["val"],
        index_label=["datetime_UTC"],
    )
    return data

def download_nwis_site_info(
    site_name, site_no, skiprows=34
):
    """download data from https://nwis.waterdata.usge and outputs as dataframe

    inputs:
    site_name = user specified name for site
    site_no = USGS site number code
    skiprows = number of header rows to skip (default=28)

    return = site info
    """

    # output file and request
    out_fn = output_data / f"{site_name}_{site_no}_site_information.txt"
    request = f"https://waterdata.usgs.gov/nwis/site/?format=rdb&site_no={site_no}"

    # get data
    txt, http = urllib.request.urlretrieve(request, out_fn)

    # Pandas
    try:
        data = pd.read_csv(
            txt,
            sep="\t",
            skiprows=skiprows,
            usecols=[2, 9, 10],
            header=0,
            names=["site_name", "gage_alt_ft", "alt_datum"],
            na_values=["Eqp", "***", "--", "Dis", "Dry", "Ice", "Mnt", "Ssn", "ZFl"],
            dtype={"salinity_ppt": float}
        )
    except:
        print("Problem with parsing text ")
        os.remove(txt)
        return None
    
    data["gage_alt_m"] = data["gage_alt_ft"] * 0.3048


    data.to_csv(
        output_data / f"{site_name}_{begin_date}.csv",
        sep="\t"
    )
    return data

In [None]:
model = "p03"
data_code = 480
bird_foot_stations = [
    "Cow Bayou",
    "Bay Gardene",
    "Empire Waterway",
    "Snake Island",
    "Stone Island",
    "Crooked Bayou",
]

plot_stations = [
    "Stone Island",
    "Empire Waterway",
    "Gulfport",
    "East Ship",
    "Biloxi Bay"
]

plot_stations = [
    "Graveline Bayou",
    "Grand Pass",
    "Gulfport",
    "East Ship Island",
    "Biloxi Bay",
]

t0 = pd.to_datetime("2018-09-01")
tf = pd.to_datetime("2018-10-01")

c = plt.rcParams["axes.prop_cycle"].by_key()["color"][2]
c2 = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]
for station in his.station_name.values:

    # station = station.decode("utf-8")
    station_info = station_list.loc[station]
    station_id = station_info["station_id"]
    station_no = station_info.name.decode()
    nickname = station_info.nickname

    if station_id[:4] == "CRMS":
        continue

    if nickname in set(plot_stations):
        continue

    obs_fn = output_data / f"{station_id}_{begin_date}.csv"

    if not obs_fn.exists():
        print("Data missing. Fetching from internet!")
        obs = download_nwis_data(
            station_id, station_no, begin_date, end_date, data_code=data_code
        )
    else:
        print("Data exists!")
        obs = pd.read_csv(obs_fn, sep="\t", index_col=[0], dtype={"val": float})

    try:
        if obs == None:
            continue
    except:
        pass

    # get vertical position
    station_details = download_nwis_site_info(nickname, station_no)
    gage_z = station_details["gage_alt_m"].values

    # load model output
    mod = his.sel(station_name=station)

    mean_h = mod.where(mod.waterdepth != -999.0).waterdepth.mean(dim="time").values

    # get mean of center sigma layers to index entire series
    mean_z = mod.zcoordinate_c.mean(dim="time").values
    gage_idx = np.argmin(np.abs(mean_z - gage_z))

    fig, ax = plt.subplots(figsize=(14, 6))
    sal = mod["salinity"].isel(laydim=gage_idx)

    t = mod.time.values
    ax.plot(t, sal, label=f"computed salinity", linewidth=1.75, zorder=100, color=c2)

    obs_t = pd.to_datetime(obs.index)
    obs_val = obs.values
    ax.plot(
        obs_t, obs_val, lw=1.25, color=c, label="observed salinity",
    )

    ax.set_ylabel("Salinity (ppt)")
    ax.set_xlim(t[0], t[-1])
    ax.set_title(f"{nickname}")
    ax.legend()
    #ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax.grid()
    ylim = ax.get_ylim()
    ax.set_ylim([0, ylim[1]])
    ax.set_xlim(obs_t.min(), obs_t.max())

    fn = figures / f"{nickname}_{model}_salinity_comparison.png"
    fig.savefig(fn, bbox_inches="tight")

In [None]:
model = "f07"
data_code = 480
bird_foot_stations = [
    "Cow Bayou",
    "Bay Gardene",
    "Empire Waterway",
    "Snake Island",
    "Stone Island",
    "Crooked Bayou",
]

c = plt.rcParams["axes.prop_cycle"].by_key()["color"][2]
c2 = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]
for station in uncalib_his.station_name.values:

    # station = station.decode("utf-8")
    station_info = station_list.loc[station]
    station_id = station_info["station_id"]
    station_no = station_info.name.decode()
    nickname = station_info.nickname

    if station_id[:4] == "CRMS":
        continue

    # if nickname in set(bird_foot_stations):
    #    continue

    obs_fn = output_data / f"{station_id}_{begin_date}.csv"

    if not obs_fn.exists():
        print("Data missing. Fetching from internet!")
        obs = download_nwis_data(
            station_id, station_no, begin_date, end_date, data_code=data_code
        )
    else:
        print("Data exists!")
        obs = pd.read_csv(obs_fn, sep="\t", index_col=[0], dtype={"val": float})

    try:
        if obs == None:
            continue
    except:
        pass

    # get vertical position
    station_details = download_nwis_site_info(nickname, station_no)
    gage_z = station_details["gage_alt_m"].values

    # load model output
    mod = his.sel(station_name=station)

    mean_h = mod.where(mod.waterdepth != -999.0).waterdepth.mean(dim="time").values

    # get mean of center sigma layers to index entire series
    mean_z = mod.zcoordinate_c.mean(dim="time").values
    gage_idx = np.argmin(np.abs(mean_z - gage_z))

    fig, ax = plt.subplots(figsize=(14, 6))
    sal = mod["salinity"].isel(laydim=gage_idx)

    t = mod.time.values
    ax.plot(t, sal, label=f"computed salinity", linewidth=1.75, zorder=100, color=c2)

    obs_t = pd.to_datetime(obs.index)
    obs_val = obs.values
    ax.plot(
        obs_t, obs_val, lw=1.25, color=c, label="observed salinity",
    )

    ax.set_ylabel("Salinity (ppt)")
    ax.set_xlim(t[0], t[-1])
    ax.set_title(f"{nickname}")
    ax.legend()
    #ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax.grid()
    ylim = ax.get_ylim()
    ax.set_ylim([0, ylim[1]])
    ax.set_xlim(obs_t.min(), obs_t.max())

    fn = figures / f"{nickname}_{model}_salinity_comparison.png"
    fig.savefig(fn, bbox_inches="tight")

In [None]:
model = "p03"
his = calib_his

model = "f07"
his = uncalib_his

plot_stations = [
    "Stone Island",
    "Empire Waterway",
    "Gulfport",
    "East Ship Island",
    "Biloxi Bay",
]

plot_stations = [
    "Graveline Bayou",
    "Grand Pass",
    "Gulfport",
    "East Ship Island",
    "Biloxi Bay",
]

t0 = pd.to_datetime("2018-09-01")
tf = pd.to_datetime("2018-10-01")

c = plt.rcParams["axes.prop_cycle"].by_key()["color"][2]
c2 = plt.rcParams["axes.prop_cycle"].by_key()["color"][0]

fig, axes = plt.subplots(figsize=(14, 14), nrows=5, sharex=True, sharey=True)

i = 0
for station in his.station_name.values:

    # station = station.decode("utf-8")
    station_info = station_list.loc[station]
    station_id = station_info["station_id"]
    station_no = station_info.name.decode()
    nickname = station_info.nickname

    if nickname not in set(plot_stations):
        continue

    ax = axes[i]
    i += 1

    obs_fn = output_data / f"{station_id}_{begin_date}.csv"
    obs = pd.read_csv(obs_fn, sep="\t", index_col=[0], dtype={"val": float})

    # get vertical position
    station_details = download_nwis_site_info(nickname, station_no)
    gage_z = station_details["gage_alt_m"].values

    # load model output
    mod = his.sel(station_name=station)

    # get mean of center sigma layers to index entire series
    mean_z = mod.zcoordinate_c.mean(dim="time").values
    gage_idx = np.argmin(np.abs(mean_z - gage_z))

    sal = mod["salinity"].isel(laydim=gage_idx)

    t = mod.time.values
    ax.set_ylabel("Salinity (psu)")

    if nickname == "Gulfport" or nickname == "East Ship Island":
        ypos = 0.1
    else:
        ypos = 0.8

    ax.text(
        x=0.025,
        y=ypos,
        s=f"{nickname}",
        transform=ax.transAxes,
        fontsize=14,
        bbox={"facecolor": "w", "edgecolor": "b"},
    )

    ax.set_xlim(t0, tf)
    xticks = ax.get_xticks()
    ax.set_xticks(xticks[:-1])

    ax.tick_params(which="both", axis="both", direction="in")

    ax.grid(alpha=0.5, lw=0.5)
    ax.set_ylim(0, 35)

    if (model == "f07") and (nickname == "Graveline Bayou"):
        continue

    ax.plot(t, sal, label=f"computed salinity", linewidth=1.75, zorder=100, color=c2)

    obs_t = pd.to_datetime(obs.index)
    obs_val = obs.values
    ax.plot(
        obs_t, obs_val, lw=1.25, color=c, label="observed salinity",
    )
    pass

axes[0].legend()
plt.subplots_adjust(hspace=0.1)

fn = figures / f"prod_{model}_salinity_comparison_multistation.png"
fig.savefig(fn, bbox_inches="tight", dpi=500)

fn = figures / f"prod_{model}_salinity_comparison_multistation.svg"
fig.savefig(fn, bbox_inches="tight")