## Calculate pot

Function of script:
* Open prepared water level timeseries from last step
* Apply stn_thresh from metadata.csv to calculate peaks-over-threshold
* Format and save as netcdf

Context in workflow:
* Must be run after water level timeseries preparation

In [None]:
import os

import pandas as pd
from pyextremes import get_extremes
import xarray as xr
from pathlib import Path
%matplotlib inline

base_start = 1995 
base_end = 2014
missing_value = 999.999
dec = 3
max_missing = 0.3
min_hrs_per_day = 12
decluster = 25 # hours

DATA = Path("data")
OUTPUTS = DATA / "outputs"

stnlist_csv_edit = OUTPUTS / "metadata.csv"
proc_dir = OUTPUTS / "wl_proc"

stnlist = pd.read_csv(stnlist_csv_edit, encoding='latin1')

In [None]:
for i, row in stnlist.iterrows():
    stn_num = str(row['stn_num']).zfill(5)
    stn_name = row['stn_name']
    stn_thresh = row['stn_thresh']
    sl_mm_per_yr = row['sl_mm_per_yr']
    crop_calendar_nyrs = row['calendar_nyrs']
    wl_path = os.path.join(proc_dir, f"{stn_num}_wl.nc") 
    wl_pot_path = os.path.join(proc_dir, f"{stn_num}_wl_pot.nc") 
    
    print(stn_num, stn_name)
    
    # Read prepared water level timeseries
    wl_path = os.path.join(proc_dir, f"{stn_num}_wl.nc") 
    wl = xr.open_dataarray(wl_path).dropna(dim = 'time')

    # Get  POT
    alldata_POT = get_extremes(ts=wl.to_series(), method="POT", threshold=stn_thresh, r=f'{decluster}h')

    # Format & save netcdf file
    pot1 = alldata_POT.reset_index()
    pot1.columns = ['time', 'wl_pot']
    wl_pot= xr.DataArray(pot1['wl_pot'], dims=['time'], coords={'time': pot1['time']})
    
    attrs = {
        "wl_stn_name": stn_name,
        "wl_stn_id": stn_num,
        "stn_thresh": stn_thresh,
        "units": "m",
        "standard_name": "sea_surface_height_above_geopotential_datum",
        "geopotential_datum_name": "Canadian Geodetic Vertical Datum of 2013 (CGVD2013)",
        "sl_mm_yr": sl_mm_per_yr,
        "ref_period": [base_start, base_end],
        "label": "Water Level Peaks Over Threshold (POT)",
        "peaks_per_yr": round(len(wl_pot.time) / crop_calendar_nyrs, 2),
    }
    
    with xr.set_options(keep_attrs=True):
        wl_pot.attrs.update(attrs)
    wl_pot.to_netcdf(wl_pot_path)
        
stnlist.to_csv(stnlist_csv_edit, index=False, encoding='latin1')