In [None]:
import urllib.request
from pathlib import Path

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import signal

In [None]:
from bokeh.io import curdoc, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.themes import built_in_themes

output_notebook()
curdoc().theme = "light_minimal"

In [None]:
plt.style.use("seaborn-talk")

In [None]:
project = Path.cwd()
output = project / "output"
site_info_output = output / "site_info"
discharge_data = output / "discharge_data"
delft3d_input = output / "delft3d_input"
input_data = project / "input"
figures = project / "figures"

In [None]:
# some constants
cfs2cms = 0.0283168

# functions

In [None]:
def apply_butterworth(discharge, buff=20, dts=25, N=5):
    """apply butterworth filter to remove tidal influence from data
    
    input:
    discharge = discharge dataframe
    dts       = sampling interval in minutes
    N         = filter order
    
    returns:
    filtered dataframe
    
    """
    # parameters
    crit_freq = 1/(((24.8412 + buff)*60*60))  # lundar day in hours to Hz
    fs = 1/(dts*60)  # sampling frequency
    
    b, a = signal.butter(N, crit_freq, btype='lowpass', fs=fs)

    filtered = discharge.apply(lambda x: signal.filtfilt(b, a, x))
    filtered.columns = ['discharge_cms_Butterworth_filtered']
    
    return filtered

def apply_godin(discharge):
    """apply Godin filter to remove tidal influence from data
    
    input:
    discharge = discharge dataframe
    
    returns:
    filtered dataframe
    
    """
    # parameters
    # Godin filter (USGS standard)
    godin = discharge.resample('1H').mean().interpolate(method='time').rolling(
        window=24, center=True).mean().rolling(
        window=25, center=True).mean().rolling(
        window=25, center=True).mean()
    godin.columns = ['discharge_cms_Godin_filtered']
    
    return godin

def combine_discharges(discharges):
    """combine discharextra_infoata after interpolating to common index
    
    input = list of discharge dataframes
    
    returns combined discharge dataframe
    """
    dedups = []
    regular_data = []
    for discharge in discharges:
        dedups.append(discharge[~discharge.index.duplicated()])
   
    new_index = dedups[0].index
    for dedup in dedups[1:]:
        new_index = new_index.union(dedup.index).unique()
   
    for dedup in dedups:
        regular_data.append(dedup.reindex(new_index).interpolate(method="linear"))
        pass
    
    discharge_combined = regular_data[0]
    for regs in regular_data[1:]:
        discharge_combined += regs
    
    return discharge_combined

def download_discharge_nwis(
    site_name, site_no, begin_date, end_date, data_code=60, skiprows=28
):
    """download data from https://nwis.waterdata.usge and outputs as dataframe

    inputs:
    site_name = user specified name for site
    site_no = USGS site number code
    begin_date = first day in timeseries (YYYY-MM-dd)
    end_date = last day in timeseries (YYYY-MM-dd)
    skiprows = number of header rows to skip (default=28)

    return = discharge (pandas DataFrame)
    """

    # some constants
    cfs2cms = 0.0283168

    # output file and request
    out_fn = discharge_data / f"{site_name}_{site_no}_{begin_date}_{end_date}.txt"
    request = f"https://nwis.waterdata.usgs.gov/usa/nwis/uv/?cb_{data_code:05d}=on&format=rdb&site_no={site_no}&period=&begin_date={begin_date}&end_date={end_date}"

    # get data
    txt, http = urllib.request.urlretrieve(request, out_fn)

    # Pandas
    try:
        discharge = pd.read_csv(
            txt,
            sep="\s+",
            skiprows=skiprows,
            usecols=[2, 3, 5],
            parse_dates={"datetime_CST": [0, 1]},
            header=0,
            index_col=0,
            names=["date", "time", "discharge"],
            dtype={"discharge": float}
        )
    except:
        print("Problem with parsing text ")
        raise

    try:
        discharge.discharge = discharge.discharge * cfs2cms
    except TypeError as e:
        print("There is a problem with DataFrame structure/dtypes")
        raise

    try:
        discharge.index = (
            discharge.index.tz_localize("America/Chicago", ambiguous=True)
            .tz_convert("UTC")
            .tz_localize(None)
        )
        discharge.index = discharge.index.rename("datetime_UTC")
    except AttributeError as e:
        print("Problem converting datetime to UTC. Check data")
        raise

    discharge.to_csv(
        discharge_data / f"{site_name}_{begin_date}.csv",
        sep="\t",
        header=["discharge_m3s"],
        index_label=["datetime_UTC"],
    )
    return discharge


def download_mean_discharge_nwis(
    site_name, site_no, begin_date, end_date, data_code=60, skiprows=28
):
    """download data from https://nwis.waterdata.usge and outputs as dataframe

    inputs:
    site_name = user specified name for site
    site_no = USGS site number code
    begin_date = first day in timeseries (YYYY-MM-dd)
    end_date = last day in timeseries (YYYY-MM-dd)
    skiprows = number of header rows to skip (default=28)

    return = discharge (pandas DataFrame)
    """

    # some constants
    cfs2cms = 0.0283168

    # output file and request
    out_fn = discharge_data / f"{site_name}_{site_no}_{begin_date}_{end_date}.txt"
    request = f"https://nwis.waterdata.usgs.gov/nwis/dv?cb_{data_code:05d}=on&format=rdb&referred_module=sw&site_no={site_no}&period=&begin_date={begin_date}&end_date={end_date}"

    # get data
    txt, http = urllib.request.urlretrieve(request, out_fn)

    # Pandas
    try:
        discharge = pd.read_csv(
            txt,
            sep="\s+",
            skiprows=skiprows,
            usecols=[2, 3],
            parse_dates={"datetime_CST": [0]},
            header=0,
            index_col=0,
            names=["date", "discharge"],
        )
    except:
        print("Problem with parsing text data")
        raise

    try:
        discharge = discharge * cfs2cms
    except TypeError as e:
        print("There is a problem with DataFrame structure/dtypes")
        raise

    try:
        discharge.index = (
            discharge.index.tz_localize("America/Chicago", ambiguous=True)
            .tz_convert("UTC")
            .tz_localize(None)
        )
        discharge.index = discharge.index.rename("datetime_UTC")
    except AttributeError as e:
        print("Problem converting datetime to UTC. Check data")
        raise

    discharge.to_csv(
        discharge_data / f"{site_name}_{begin_date}.csv",
        sep="\t",
        header=["discharge_m3s"],
        index_label=["datetime_UTC"],
    )
    return discharge

def extra_info(site_info, site_no, data, method):
    site_info.loc[int(site_no), "mean"] = data.mean().values
    site_info.loc[int(site_no), "peak"] = data.max().values
    site_info.loc[int(site_no), "method"] = method

# Bonnet Carre Spillway

In [None]:
# calculate Bonnet Carre Spillway (BCS) discharge
# opening data form MVD
data_fn = input_data / "bonnet_carre_2016-2019.csv"
opening = pd.read_csv(data_fn, parse_dates=True, index_col=0, usecols=[0, 2])

# days in model
not_open_idx = pd.date_range(start="2014-12-01", end="2020-01-02", freq="D")
not_open = pd.DataFrame(
    data=np.zeros(len(not_open_idx)),
    index=not_open_idx,
    columns=["discharge_cms"],
)

leakage = 5350 * cfs2cms

discharge = opening + not_open
discharge = discharge.where(discharge > leakage, leakage)
discharge = discharge.where(~discharge.isna(), leakage)

# convert time stamp to UTC
discharge.index = (
    discharge.index.tz_localize("America/Chicago", ambiguous=True)
    .tz_convert("UTC")
    .tz_localize(None)
)
discharge.to_csv(
    delft3d_input / "Bonnet_Carre_2015-2020_converted.csv",
    sep="\t",
    header=["discharge_cms"],
    index_label=["datetime_UTC"],
)

# visualize
p = figure(
    title="Bonnet CarrÃ© Spillway",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(discharge.index, discharge.discharge_cms, line_width=2)
show(p)

# Site descriptions

In [None]:
# potential sites to be used
sites = {
    "jourdan_river": ("02481660", "Jourdan River"),  # St. Louise Bay
    "tuxachanie_creek": (
        "02480500",
        "Biloxi River",
    ),  # Biloxi Bay (need to extrapolate form old stats)
    "tchoutacabouffa_river": ("02480345", "Biloxi Bay"),  # Biloxi Bay
    "pascagoula_river": ("02479310", "Pascagoula River"),  # Pascagoula River
    "escatawpa_river_at_I-10": ("0248018020", "Pascagoula River"),  # Pascagoula River
    "escatawpa_river_nr_agricola": ("02479560", "Pascagoula River"),  # Pascagoula River
    "atchafalaya_at_morgan_city": ("07381600", "Atchafalaya River"),  # Atchafalaya
    "wax_lake_outlet_at_calumet": ("07381590", "Wax Lake Outlet"),  # Wax Lake
    "mississippi_at_belle_chase": ("07374525", "Belle Chasse"),  # MS River
    "new_canal_river_east_sorrento": (
        "073802284",
        "Blind River",
    ),  # Used for Blind River
    "amite_river_at_port_vincent": ("07380120", "Amite River"),  # Used for Amite
    "tickfaw_river_at_holden": ("07376000", "Tickfaw River"),  # Used for Tickfaw
    "natalbany_river_at_baptist": ("07376500", "Tickfaw River"),  # Used for Tickfaw
    "tangipahoa_river_at_robert": (
        "07375500",
        "Tangipahoa River",
    ),  # Used for Tangipahoa
    "tchefuncte_near_folsom": ("07375000", "Tchefuncte River"),  # Used for Tchefuncte
    "bogue_chitto_river": ("02492000", "Pearl River"),  # Used for pearl
    "pearl_at_bogalusa": ("02489500", "Pearl River"),  # Used for pearl
    "pearl_at_walkiah": ("02492110", "Pearl River"),  # Used for pearl
    "pearl_at_nstl_station": ("02492620", "Pearl River"),  # Used for pearl
    "pearl_at_cxs_railroad": ("301141089320300", "Pearl River"),  # Used for pearl
    "west_hobolochitto": ("02492360", "Pearl River"),  # Used for pearl
    "east_hobolochitto": ("02492343", "Pearl River"),  # Used for pearl
    "wolf_river": ("02481510", "Wolf River"),  # St. Louise Bay
    "biloxi_river": ("02481000", "Biloxi River"),  # Biloxi Bay
    "red_creek_at_vestry": ("02479300", "Pascagoula River"),  # Pascagoula River
    "alabama_river": ("02428400", "Head of Mobile Bay"),  # Head of Mobile Bay
    "mobile_river_at_bucks": ("02470629", "Mobile River"),  # Head of Mobile Bay
    "tensaw_at_mount_vernon": ("02471019", "Tensaw River"),  # Head of Mobile Bay
    "tombigbee": ("02469761", "Head of Mobile Bay"),  # Head of Mobile Bay
    "chickasaw_creek": ("02471001", "Mobile River"),  # Head of Mobile Bay
    "fowl_river": ("02471078", "Fowl River"),  # Mouth of Mobile Bay
    "fish_river": ("02378500", "Magnolia River"),  # Mouth of Mobile Bay
    "magnolia_river": ("02378300", "Magnolia River"),  # Mouth of Mobile Bay
}

skiprows = 30
site_infos = []

for site_name, (site_no, bc_source) in sites.items():
    out_fn = site_info_output / f"{site_name}_{site_no}_site_info.txt"
    request = f"https://waterservices.usgs.gov/nwis/site/?format=rdb&sites={site_no}"

    if not out_fn.exists():

        try:
            txt, http = urllib.request.urlretrieve(request, out_fn)
        except:
            print(f"{site_name} has no data!!!")
            continue
        pass
    else:
        txt = out_fn
        pass

    site_info = pd.read_csv(
        txt,
        sep="\t",
        skiprows=skiprows,
        usecols=[1, 2, 4, 5],
        header=0,
        index_col=0,
        names=["site_id", "long_name", "lat", "lon"],
        dtype={"site_id": str},
    )
    site_info["site_name"] = site_name
    site_info["bc_source"] = bc_source
    site_info["used"] = False
    site_info["mean"] = 0
    site_info["peak"] = 0
    site_info["method"] = ""
    site_info2 = site_info.copy()
    site_infos.append(site_info)

site_info = pd.concat(site_infos)
site_info = gpd.GeoDataFrame(
    site_info,
    geometry=gpd.points_from_xy(site_info.lon, site_info.lat),
    crs="EPSG:4326",
)

# Download, filter, visualize and write

In [None]:
# download
begin_date = "2014-12-01"
end_date = "2020-01-02"

t0 = pd.to_datetime(begin_date)
tf = pd.to_datetime(end_date)

## Atchafalaya River

In [None]:
site_name = "atchafalaya_at_morgan_city"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_atchafalaya = download_discharge_nwis(
    site_name, site_no, begin_date, end_date, skiprows=30
)

tstop = pd.to_datetime("2019-7-13 08:00")
tstart = pd.to_datetime("2019-7-14 06:00")

q_atchafalaya_p1 = q_atchafalaya.loc[t0:tstop]
q_atchafalaya_p2 = q_atchafalaya.loc[tstop:tstart]
q_atchafalaya_p3 = q_atchafalaya.loc[tstart:tf]

rename = {"discharge_cms_Godin_filtered": "discharge"}
rename1 = {"discharge": "discharge_cms_Godin_filtered"}
q_atchafalaya_godin_p1 = apply_godin(q_atchafalaya_p1).rename(rename, axis=1)
q_atchafalaya_godin_p3 = apply_godin(q_atchafalaya_p3).rename(rename, axis=1)


q_atchafalaya_godin = pd.concat(
    [q_atchafalaya_godin_p1, q_atchafalaya_p2, q_atchafalaya_godin_p3]
).rename(rename1, axis=1)


p = figure(
    title="Atchafalaya River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(q_atchafalaya.index, q_atchafalaya.discharge, line_color="orange")
p.line(
    q_atchafalaya_godin.index,
    q_atchafalaya_godin.discharge_cms_Godin_filtered,
    line_color="blue",
    line_width=2,
)
show(p)

extra_info(site_info, site_no, q_atchafalaya_godin, "direct")

print(site_info.loc[int(site_no)])

# write
q_atchafalaya_godin.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}_godin.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Wax Lake Outlet

In [None]:
# download
site_name = "wax_lake_outlet_at_calumet"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_wax_lake = download_discharge_nwis(site_name, site_no, begin_date, end_date)

tstop = pd.to_datetime("2019-7-13 08:00")
tstart = pd.to_datetime("2019-7-14 12:00")

q_wax_lake_p1 = q_wax_lake.loc[t0:tstop]
q_wax_lake_p2 = q_wax_lake.loc[tstop:tstart]
q_wax_lake_p3 = q_wax_lake.loc[tstart:tf]

rename = {"discharge_cms_Godin_filtered": "discharge"}
rename1 = {"discharge": "discharge_cms_Godin_filtered"}
q_wax_lake_godin_p1 = apply_godin(q_wax_lake_p1).rename(rename, axis=1)
q_wax_lake_godin_p3 = apply_godin(q_wax_lake_p3).rename(rename, axis=1)

q_wax_lake_godin = pd.concat(
    [q_wax_lake_godin_p1, q_wax_lake_p2, q_wax_lake_godin_p3]
).rename(rename1, axis=1)

extra_info(site_info, site_no, q_wax_lake_godin, "direct")
print(site_info.loc[int(site_no)])

p = figure(
    title="wax_lake River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(q_wax_lake.index, q_wax_lake.discharge, line_color="orange")
p.line(
    q_wax_lake_godin.index,
    q_wax_lake_godin.discharge_cms_Godin_filtered,
    line_color="blue",
    line_width=2,
)
show(p)

# write
q_wax_lake_godin.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}_godin.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

# Belle Chasse

In [None]:
# download and tidally filter Belle Chasse data
site_name = "mississippi_at_belle_chase"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_mississippi = download_discharge_nwis(
    site_name, site_no, begin_date, end_date, skiprows=30
)
q_mississippi_godin = apply_godin(q_mississippi)

extra_info(site_info, site_no, q_mississippi_godin, "direct")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title="Mississippi River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(q_mississippi.index, q_mississippi.discharge, line_color="orange")
p.line(
    q_mississippi_godin.index,
    q_mississippi_godin.discharge_cms_Godin_filtered,
    line_color="blue",
    line_width=2,
)
show(p)

q_mississippi_godin.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}_godin.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Blind River (New canal river)

In [None]:
# visualize
site_name = "new_canal_river_east_sorrento"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_new_canal = download_discharge_nwis(site_name, site_no, begin_date, end_date)
q_new_canal_godin = apply_godin(q_new_canal)

# get daily mean and append to recorded discharge
q_new_canal_means = (
    q_new_canal_godin.groupby(
        [q_new_canal_godin.index.month, q_new_canal_godin.index.day]
    )
    .mean()
    .values
)
timestamps = pd.date_range("2014-01-01", "2014-12-31")
q11 = pd.DataFrame(data=q_new_canal_means, index=timestamps)

timestamps = pd.date_range("2015-01-01", "2015-12-31")
q0 = pd.DataFrame(data=q_new_canal_means, index=timestamps)

timestamps = pd.date_range("2016-01-01", "2016-12-30")
q1 = pd.DataFrame(data=q_new_canal_means, index=timestamps)

timestamps = pd.date_range(
    "2017-01-01", q_new_canal.index[0] - pd.to_timedelta("1 day")
)
q2 = pd.DataFrame(data=q_new_canal_means[: len(timestamps)], index=timestamps)

q_means = pd.concat([q11, q0, q1, q2])
q_means.index = q_means.index.rename("datetime_CST")
q_means.rename({0: "discharge_cms_Godin_filtered"}, inplace=True, axis=1)

# create historical/recorded hydrid record
q_new_canal_godin = pd.concat([q_means, q_new_canal_godin])

extra_info(site_info, site_no, q_new_canal_godin, "direct/historical")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title="New_canal River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_means.index,
    q_means.discharge_cms_Godin_filtered,
    line_color="green",
    line_dash="dashed",
)
p.line(q_new_canal.index, q_new_canal.discharge, line_color="orange")
p.line(
    q_new_canal_godin.index,
    q_new_canal_godin.discharge_cms_Godin_filtered,
    line_color="blue",
    line_width=2,
)
show(p)

q_new_canal_godin.dropna().to_csv(
    delft3d_input / f"blind_river-{site_name}_{begin_date}_godin.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Amite River

In [None]:
# visualize
site_name = 'amite_river_at_port_vincent'
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_amite = download_discharge_nwis(site_name, site_no, begin_date, end_date)
q_amite_godin = apply_godin(q_amite)

extra_info(site_info, site_no, q_amite_godin, "direct")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title="amite River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(q_amite.index, q_amite.discharge, line_color="orange")
p.line(
    q_amite_godin.index,
    q_amite_godin.discharge_cms_Godin_filtered,
    line_color="blue",
    line_width=2,
)
show(p)

q_amite_godin.dropna().to_csv(
    delft3d_input / f'{site_name}_{begin_date}_godin.csv',
    sep='\t',
    header=['discharge_m3s'],
    index_label=['datetime_UTC']
)

## The Tickfaw and Natalbany Rivers (Tickfaw source)

In [None]:
# The Tickfaw doesn't needs to be tidally filtered
site_name = "tickfaw_river_at_holden"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True
q_tickfaw = download_discharge_nwis(site_name, site_no, begin_date, end_date)
extra_info(site_info, site_no, q_tickfaw, "direct")
print(site_info.loc[int(site_no)])

# The Natalbany River doesn't need to be tidally filtered
site_name2 = "natalbany_river_at_baptist"
site_no = sites[site_name2][0]
site_info.loc[int(site_no), ["used"]] = True
q_natalbany = download_discharge_nwis(site_name, site_no, begin_date, end_date)
extra_info(site_info, site_no, q_natalbany, "direct")
print(site_info.loc[int(site_no)])

# there are duplicates in the Tickfaw data
q_tickfaw_natalbany = combine_discharges([q_tickfaw, q_natalbany])

# visualize
p = figure(
    title="Tickfaw River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_tickfaw.index, q_tickfaw.discharge, line_color="orange", legend_label="Tickfaw"
)
p.line(
    q_natalbany.index,
    q_natalbany.discharge,
    line_color="green",
    legend_label="Natalbany",
)
p.line(
    q_tickfaw_natalbany.index,
    q_tickfaw_natalbany.discharge,
    line_color="blue",
    line_dash="dashed",
    legend_label="Combined",
)
show(p)

# first entry is a nan
q_tickfaw_natalbany.iloc[1:].to_csv(
    delft3d_input / f"Tickfaw_River_{site_name}_and_{site_name2}_{begin_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Tangipahoa River

In [None]:
# visualize
# no need to filter
site_name = "tangipahoa_river_at_robert"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_tangipahoa = download_discharge_nwis(site_name, site_no, begin_date, end_date)
q_tangipahoa_godin = apply_godin(q_tangipahoa)

extra_info(site_info, site_no, q_tangipahoa_godin, "direct")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title="tangipahoa River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_tangipahoa.index,
    q_tangipahoa.discharge,
    line_color="orange",
    legend_label="Tangipahoa",
)
p.line(
    q_tangipahoa_godin.index,
    q_tangipahoa_godin.discharge_cms_Godin_filtered,
    line_color="blue",
    line_width=2,
    legend_label="Filtered",
)
show(p)

q_tangipahoa.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## The Tchefuncte River

In [None]:
# visualize
site_name = "tchefuncte_near_folsom"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_tchefuncte = download_discharge_nwis(site_name, site_no, begin_date, end_date)

extra_info(site_info, site_no, q_tchefuncte, "direct")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title="tchefuncte River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_tchefuncte.index,
    q_tchefuncte.discharge,
    line_color="orange",
    legend_label="Tchefuncte",
)
show(p)

q_tchefuncte.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Jourdan River

In [None]:
# visualize
site_name = "jourdan_river"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

# The mean over calendar days is taken from 2000-10-10 to 2004-09-30 after being Godin filtered
q_jourdan = download_discharge_nwis(
    site_name, site_no, "2000-10-10", "2004-09-30", skiprows=31
)
q_jourdan = q_jourdan.where(q_jourdan.discharge > 0, np.nan).dropna()
q_jourdan_godin = apply_godin(q_jourdan)

q_jourdan_godin["dayofyear"] = q_jourdan_godin.index.dayofyear
q_mean = q_jourdan_godin.groupby(by=["dayofyear"]).mean().dropna()

time_idx = pd.date_range("2015-01-01", "2015-12-31", freq="1D")
q_11 = pd.Series(q_mean.values.squeeze()[:-1], index=time_idx)

time_idx = pd.date_range("2015-12-01", "2015-12-31", freq="1D")
q_0 = pd.Series(q_mean[-31:].values.squeeze(), index=time_idx)

time_idx = pd.date_range("2016-01-01", "2016-12-31", freq="1D")
q_1 = pd.Series(q_mean.values.squeeze(), index=time_idx)

q_tile_mean = np.tile(q_mean[:-1].values, (3, 1)).squeeze()
time_idx = pd.date_range("2017-01-01", "2019-12-31", freq="1D")
q_2 = pd.Series(q_tile_mean, index=time_idx)


time_idx = pd.date_range("2020-01-01", "2020-01-02", freq="1D")
q_3 = pd.Series(q_mean[-2:].values.squeeze(), index=time_idx)

q_jourdan_godin_mean = pd.concat([q_11, q_0, q_1, q_2, q_3])

site_info.loc[int(site_no), "mean"] = q_jourdan_godin_mean.mean()
site_info.loc[int(site_no), "peak"] = q_jourdan_godin_mean.max()
site_info.loc[int(site_no), "method"] = "historical"
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title="Jourdan River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_jourdan_godin_mean.index,
    q_jourdan_godin_mean.values,
    line_color="orange",
    legend_label="Jourdan",
)
show(p)

q_jourdan_godin_mean.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}-{end_date}_daily_means_godin.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Wolf River

In [None]:
# visualize
site_name = "wolf_river"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_wolf_river = download_discharge_nwis(site_name, site_no, begin_date, end_date)

extra_info(site_info, site_no, q_wolf_river, "direct")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title=site_name,
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_wolf_river.index,
    q_wolf_river.discharge,
    line_color="orange",
    legend_label=site_name,
)
show(p)

q_wolf_river.dropna().to_csv(
    delft3d_input / f"{site_name}_{begin_date}-{end_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Biloxi

In [None]:
# visualize
# no filtering needed
site_name = "biloxi_river"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_biloxi_river = download_discharge_nwis(site_name, site_no, begin_date, end_date)

extra_info(site_info, site_no, q_biloxi_river, "direct")
print(site_info.loc[int(site_no)])

# add historical daily mean from Tuxachanie creek
site_name2 = "tuxachanie_creek"
site_no2 = sites[site_name2][0]
site_info.loc[int(site_no2), ["used"]] = True
q_tuxachanie = pd.read_csv(
    input_data / f"{site_name2}_historical_mean_daily_cfs.txt",
    sep="\s+",
    skiprows=46,
    usecols=[4, 5, 13],
    names=["month", "day", "discharge"],
)

q_tuxachanie.discharge = q_tuxachanie.discharge * cfs2cms
site_info.loc[int(site_no2), "mean"] = q_tuxachanie["discharge"].mean()
site_info.loc[int(site_no2), "peak"] = q_tuxachanie["discharge"].max()
site_info.loc[int(site_no2), "method"] = "historical"
print(site_info.loc[int(site_no2)])

timestamps = [
    f"2015-{month}-{day}" for month, day in zip(q_tuxachanie.month, q_tuxachanie.day)
]
q_0 = pd.DataFrame(
    data=q_tuxachanie.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2016-{month}-{day}" for month, day in zip(q_tuxachanie.month, q_tuxachanie.day)
]
q_1 = pd.DataFrame(
    data=q_tuxachanie.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2017-{month}-{day}" for month, day in zip(q_tuxachanie.month, q_tuxachanie.day)
]
q_2 = pd.DataFrame(
    data=q_tuxachanie.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2018-{month}-{day}" for month, day in zip(q_tuxachanie.month, q_tuxachanie.day)
]
q_3 = pd.DataFrame(
    data=q_tuxachanie.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2019-{month}-{day}" for month, day in zip(q_tuxachanie.month, q_tuxachanie.day)
]
q_4 = pd.DataFrame(
    data=q_tuxachanie.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)


timestamps = [
    f"2020-{month}-{day}"
    for month, day in zip(q_tuxachanie.month[:2], q_tuxachanie.day[:2])
]
q_5 = pd.DataFrame(
    data=q_tuxachanie.discharge.values[:2],
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

q_tuxachanie = pd.concat([q_0, q_1, q_2, q_3, q_4, q_5])

q_biloxi_tuxachanie = combine_discharges([q_biloxi_river, q_tuxachanie])

# visualize
p = figure(
    title=f"{site_name} {site_name2}",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_biloxi_river.index,
    q_biloxi_river.discharge,
    line_color="orange",
    legend_label="biloxi_river",
)
p.line(
    q_tuxachanie.index,
    q_tuxachanie.discharge,
    line_color="green",
    legend_label="Tuxachanie",
)
p.line(
    q_biloxi_tuxachanie.index,
    q_biloxi_tuxachanie.discharge,
    line_color="blue",
    line_dash="dashed",
    legend_label="Combined",
)
show(p)

# write out data
q_biloxi_tuxachanie.dropna().to_csv(
    delft3d_input
    / f"Biloxi_Bay_{site_name}_and_{site_name2}_{begin_date}-{end_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## The Pearl River

In [None]:
site_name = "bogue_chitto_river"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True
q_bogue_chitto = download_discharge_nwis(site_name, site_no, begin_date, end_date)

extra_info(site_info, site_no, q_bogue_chitto, "direct")
print(site_info.loc[int(site_no)])

site_name2 = "pearl_at_bogalusa"
site_no2 = sites[site_name2][0]
site_info.loc[int(site_no2), ["used"]] = True
q_pearl_river = download_discharge_nwis(site_name2, site_no2, begin_date, end_date)

extra_info(site_info, site_no2, q_pearl_river, "direct")
print(site_info.loc[int(site_no2)])

site_name3 = "west_hobolochitto"
site_no3 = sites[site_name3][0]
site_info.loc[int(site_no3), ["used"]] = True
q_west_hobo = download_discharge_nwis(site_name3, site_no3, begin_date, end_date)

extra_info(site_info, site_no3, q_west_hobo, "direct")
print(site_info.loc[int(site_no3)])

site_name4 = "east_hobolochitto"
site_no4 = sites[site_name4][0]
site_info.loc[int(site_no4), ["used"]] = True
q_east_hobo = download_discharge_nwis(site_name4, site_no4, begin_date, end_date)

extra_info(site_info, site_no4, q_east_hobo, "direct")
print(site_info.loc[int(site_no4)])

q_combined_pearl_river = combine_discharges(
    [q_bogue_chitto, q_pearl_river, q_west_hobo, q_east_hobo]
)

# visualize
p = figure(
    title="Pearl River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_bogue_chitto.index, q_bogue_chitto.discharge, line_color="orange", legend_label=site_name
)
p.line(
    q_pearl_river.index,
    q_pearl_river.discharge,
    line_color="green",
    legend_label=site_name2,
)
p.line(
    q_west_hobo.index,
    q_west_hobo.discharge,
    line_color="red",
    legend_label=site_name3,
)

p.line(
    q_east_hobo.index,
    q_east_hobo.discharge,
    line_color="blue",
    legend_label=site_name4,
)

p.line(
    q_combined_pearl_river.index,
    q_combined_pearl_river.discharge,
    line_color="black",
    line_dash="dashed",
    legend_label="Combined",
)
show(p)

q_combined_pearl_river.dropna().to_csv(
    delft3d_input / f"Pearl_River_combined_stations_{begin_date}-{end_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Pascagoula River

In [None]:
# Red Creek is a tributary of the Pascagoula.
# The Pascagoula River has data between 1993 - 2009, so daily mean's will be used initially.
# Escatawpa at I-10 has discharge data form 2001-08-26 to 2004-09-30.
# Compare I-10 obs (larger catchment) to Escatawpa at Agricola and scale the data by the mean of their ratio

begin_date_for_ratio = "2001-08-26"
end_date_for_ratio = "2003-09-30"
q_escatawpa_I10_for_ratio = (
    download_discharge_nwis(
        "escatawpa_at_I-10",
        "0248018020",
        begin_date_for_ratio,
        end_date_for_ratio,
        skiprows=29,
    )
    .groupby(pd.Grouper(freq="1D"))
    .mean()
)

q_escatawpa_agricola_for_ratio = (
    download_mean_discharge_nwis(
        "escatawpa_at_agricola", "02479560", begin_date_for_ratio, end_date_for_ratio
    )
    .groupby(pd.Grouper(freq="1D"))
    .mean()
)

ratio = (q_escatawpa_I10_for_ratio / q_escatawpa_agricola_for_ratio).mean()

site_name = "escatawpa_river_nr_agricola"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True
q_escatawpa = download_mean_discharge_nwis(site_name, site_no, begin_date, end_date)
q_escatawpa_scaled = q_escatawpa * ratio

extra_info(site_info, site_no, q_escatawpa_scaled, "historical")
print(site_info.loc[int(site_no)])

# add historical daily mean for Pascagoula creek
site_name2 = "pascagoula_river"
site_no2 = sites[site_name2][0]
site_info.loc[int(site_no2), ["used"]] = True

q_pascagoula = pd.read_csv(
    input_data / f"{site_name2}_historical_mean_daily.txt",
    sep="\s+",
    skiprows=46,
    usecols=[4, 5, 13],
    names=["month", "day", "discharge"],
)

q_pascagoula.discharge = q_pascagoula.discharge * cfs2cms

site_info.loc[int(site_no2), "mean"] = q_pascagoula.discharge.mean()
site_info.loc[int(site_no2), "peak"] = q_pascagoula.discharge.max()
site_info.loc[int(site_no2), "method"] = "historical"
print(site_info.loc[int(site_no2)])
timestamps = [
    f"2015-{month}-{day}"
    for month, day in zip(q_pascagoula.month, q_pascagoula.day)
]
q_0 = pd.DataFrame(
    data=q_pascagoula.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2016-{month}-{day}" for month, day in zip(q_pascagoula.month, q_pascagoula.day)
]
q_1 = pd.DataFrame(
    data=q_pascagoula.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2017-{month}-{day}" for month, day in zip(q_pascagoula.month, q_pascagoula.day)
]
q_2 = pd.DataFrame(
    data=q_pascagoula.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2018-{month}-{day}" for month, day in zip(q_pascagoula.month, q_pascagoula.day)
]
q_3 = pd.DataFrame(
    data=q_pascagoula.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

timestamps = [
    f"2019-{month}-{day}" for month, day in zip(q_pascagoula.month, q_pascagoula.day)
]
q_4 = pd.DataFrame(
    data=q_pascagoula.discharge.values,
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)


timestamps = [
    f"2020-{month}-{day}"
    for month, day in zip(q_pascagoula.month[:2], q_pascagoula.day[:2])
]
q_5 = pd.DataFrame(
    data=q_pascagoula.discharge.values[:2],
    index=pd.to_datetime(timestamps),
    columns=["discharge"],
    dtype=float,
)

q_pascagoula = pd.concat([q_0, q_1, q_2, q_3, q_4, q_5])

# combine
q_escatawpa_pascagoula = combine_discharges([q_escatawpa_scaled, q_pascagoula])

# visualize
p = figure(
    title="Pascagoula River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_pascagoula.index,
    q_pascagoula.discharge,
    line_color="orange",
    legend_label="pascagoula",
)
p.line(
    q_escatawpa_scaled.index,
    q_escatawpa_scaled.discharge,
    line_color="green",
    legend_label="Escatawpa",
)
p.line(
    q_escatawpa_pascagoula.index,
    q_escatawpa_pascagoula.discharge,
    line_color="blue",
    line_dash="dashed",
    legend_label="Combined",
)
show(p)

# write out data
q_escatawpa_pascagoula.dropna().to_csv(
    delft3d_input
    / f"Pascagoula_{site_name}_and_{site_name2}_{begin_date}-{end_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

## Mobile Bay

### Head of the Bay

In [None]:
# From inspection of the NHD for Alabama it appears that the Tombigbee and Alabama Rivers are tributaries of the Mobile River.
# The Mobile River then branches which are both gages (Mobile river and Tensaw)
# The Chickasaw joins wtih the Mobile River down stream of the gage
# combine The Chickasaw with the Mobile for output, keep Tensaw separate

# mobile river and tensaw have tidally filtered data available
data_code = 72137
site_name = "mobile_river_at_bucks"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_mobile = download_discharge_nwis(
    site_name, site_no, begin_date, end_date, 60, skiprows=30
)

rename = {"discharge_cms_Godin_filtered": "discharge"}
q_mobile_godin = apply_godin(q_mobile).rename(rename, axis=1)

extra_info(site_info, site_no, q_mobile_godin, "direct")
print(site_info.loc[int(site_no)])

site_name2 = "tensaw_at_mount_vernon"
site_no2 = sites[site_name2][0]
site_info.loc[int(site_no2), ["used"]] = True
q_tensaw = download_discharge_nwis(
    site_name2, site_no2, begin_date, end_date, data_code, 29
)

extra_info(site_info, site_no2, q_tensaw, "direct")
print(site_info.loc[int(site_no2)])

site_name3 = "chickasaw_creek"
site_no3 = sites[site_name3][0]
site_info.loc[int(site_no3), ["used"]] = True
q_chickasaw = download_mean_discharge_nwis(site_name3, site_no3, begin_date, end_date)

extra_info(site_info, site_no3, q_chickasaw, "direct")
print(site_info.loc[int(site_no3)])

# combine
# Remove duplicates
q_mobile_chickasaw = combine_discharges([q_mobile_godin, q_chickasaw])

## Visualize
p = figure(
    title="Tickfaw River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(q_mobile.index, q_mobile.discharge, line_color="orange", legend_label="mobile")

p.line(
    q_mobile_godin.index,
    q_mobile_godin.discharge,
    line_color="red",
    legend_label="mobile (filtered)",
)
p.line(
    q_chickasaw.index,
    q_chickasaw.discharge,
    line_color="black",
    legend_label="Chickasaw",
)
p.line(
    q_tensaw.index,
    q_tensaw.discharge,
    line_color="green",
    legend_label="Tensaw",
)
p.line(
    q_mobile_chickasaw.index,
    q_mobile_chickasaw.discharge,
    line_color="blue",
    line_dash="dashed",
    legend_label="Combined",
)
show(p)

# write out data
q_mobile_chickasaw.dropna().to_csv(
    delft3d_input / f"Mobile_{begin_date}_combined_w_Chickasaw_Creek.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

# write out data
q_tensaw.dropna().to_csv(
    delft3d_input / f"tensaw_at_mount_vernon_{begin_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

### Mouth of the bay

#### Fowl River

In [None]:
# visualize
# doesn't need to be filtered
site_name = "fowl_river"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_fowl_river = download_discharge_nwis(site_name, site_no, begin_date, end_date)

extra_info(site_info, site_no, q_fowl_river, "direct")
print(site_info.loc[int(site_no)])

# visualize
p = figure(
    title=site_name,
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_fowl_river.index,
    q_fowl_river.discharge,
    line_color="orange",
    legend_label=site_name,
)
show(p)

q_fowl_river.dropna().to_csv(
    delft3d_input / f'{site_name}_{begin_date}.csv',
    sep='\t',
    header=['discharge_m3s'],
    index_label=['datetime_UTC']
)

#### Magnolia River and Fish Creek

In [None]:
# visualize
# doesn't need to be filtered
site_name = "fish_river"
site_no = sites[site_name][0]
site_info.loc[int(site_no), ["used"]] = True

q_fish_river = download_discharge_nwis(site_name, site_no, begin_date, end_date)

extra_info(site_info, site_no, q_fish_river, "direct")
print(site_info.loc[int(site_no)])

site_name2 = "magnolia_river"
site_no2 = sites[site_name2][0]
site_info.loc[int(site_no2), ["used"]] = True

q_magnolia_river = download_discharge_nwis(site_name2, site_no2, begin_date, end_date)

extra_info(site_info, site_no2, q_magnolia_river, "direct")
print(site_info.loc[int(site_no2)])

q_magnolia_fish_rivers = combine_discharges([q_magnolia_river, q_fish_river])

## Visualize
p = figure(
    title="Magnolia River",
    x_axis_type="datetime",
    y_axis_label="discharge [m3/s]",
    plot_height=350,
    plot_width=800,
)
p.line(
    q_fish_river.index,
    q_fish_river.discharge,
    line_color="green",
    legend_label="fish",
)
p.line(
    q_magnolia_river.index,
    q_magnolia_river.discharge,
    line_color="red",
    legend_label="magnolia",
)
p.line(
    q_magnolia_fish_rivers.index,
    q_magnolia_fish_rivers.discharge,
    line_color="blue",
    legend_label="Combined",
)
show(p)


q_magnolia_fish_rivers.dropna().to_csv(
    delft3d_input / f"combined_magnolia_fish_rivers_{begin_date}.csv",
    sep="\t",
    header=["discharge_m3s"],
    index_label=["datetime_UTC"],
)

# Updated site info

In [None]:
used_site_list_fn = site_info_output / "used_usgs_gages.csv"
site_info.drop("geometry", axis=1)[site_info.used == True].to_csv(used_site_list_fn)

site_info_fn = site_info_output / "usgs_gages.shp"
site_info.to_file(site_info_fn)
site_info[site_info.used == True]

# Nice visualization

In [None]:
# visualize nice
xp = 0.025
fig, (ax, ax2) = plt.subplots(figsize=(12, 10), nrows=2)
q_mississippi.plot(ax=ax)
q_mississippi_godin.plot(ax=ax)
ax.set_ylabel("discharge [$m^3/s$]")
ax.set_xlabel("")
ax.legend(["USGS observations", "filtered (Godin)"], loc="lower right")
ax.grid(alpha=0.25, color="gray", zorder=100)
ax.text(x=xp, y=0.9, s="a", transform=ax.transAxes, fontsize=20)

q_mississippi.plot(ax=ax2)
q_mississippi_godin.plot(ax=ax2)
ax2.set_ylabel("discharge [$m^3/s$]")
ax2.set_xlabel("")
ax2.set_xlim("2018-09-01", "2018-09-14")
ax2.set_ylim(4500, 11000)
ax2.grid(alpha=0.25, color="gray", zorder=100)
ax2.get_legend().remove()
ax2.text(x=xp, y=0.9, s="b", transform=ax2.transAxes, fontsize=20)

fig.savefig(figures / "belle_chasse_2018.png", bbox_inches="tight", dpi=500)

In [None]:
# visualize nice
fig, ax = plt.subplots(figsize=(12, 5))

q_combined_pearl_river.plot(ax=ax)
q_pearl_river.plot(ax=ax)
q_bogue_chitto.plot(ax=ax)
q_west_hobo.plot(ax=ax)
q_east_hobo.plot(ax=ax)

ax.set_ylabel("discharge [$m^3/s$]")
ax.set_title("Pearl River")
ax.legend(
    [
        "Discharge input",
        "Pearl River",
        "Bogue Chitto",
        "West Hobolochitto",
        "East Hobolochitto",
    ],
    ncol=2
)
fig.savefig(figures / "pearl_river_2018.png", bbox_inches="tight")

In [None]:
# visualize nice
fig, ax = plt.subplots(figsize=(12, 5))

q_mobile.plot(ax=ax)
q_tensaw.plot(ax=ax)

ax.set_ylabel("discharge [$m^3/s$]")
ax.set_title("Mobile/Tensaw River")
ax.legend(
    [
        "Mobile River",
        "Tensaw River",
    ]
)
fig.savefig(figures / "mobile_river_2018.png", bbox_inches="tight")

# output table of river sources

In [None]:
f = output / "used_gauges_extra_info.csv"
site_info[~site_info.used == False][
    ["long_name", "bc_source", "mean", "peak", "method", "lat", "lon"]
].to_csv(f, index_label="station_id")