In [None]:
import s3fs
from glob import glob
import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
import intake 

import holoviews as hv
import geoviews as gv
from holoviews.streams import Selection1D, Params, Tap

import hvplot.pandas
import hvplot.xarray

import panel as pn
s3 = s3fs.S3FileSystem(anon=False)
gv.extension('bokeh')

## Load Data

reference notebook: Streamflow_expl.ipynb @ eis-freshwater/mississippi-river-delta

### LIS Routing Data

read re-chunked LIS routing data at bucket s3://eis-dh-hydro/LIS/rechunk-test4/OL_1km/ROUTING/LIS_HIST.d01.zarr

In [None]:
#bucket = 'eis-dh-hydro'
#key  = "LIS/rechunk-test4/OL_1km/ROUTING/LIS_HIST.d01"
#routing_data = xr.open_zarr(s3.get_mapper(f'{bucket}/{key}.zarr'))

In [None]:
cat_path = 'intake.yml'
delta_cat = intake.open_catalog(cat_path)
list(delta_cat)

In [None]:
drop_vars = ['_history', '_eis_source_path']

control_rtg_ds = delta_cat.control_2km_routing.read_chunked().drop(drop_vars) # control case
nat_rtg_ds = delta_cat.naturalized_2km_routing.read_chunked().drop(drop_vars) # naturalized case
rw_rtg_ds = delta_cat.realworld_2km_routing.read_chunked().drop(drop_vars)    # realworld case
noslr_rtg_ds = delta_cat.no_slr_2km_routing.read_chunked().drop(drop_vars)    # no sea level case

add lat/lon coordinates to dataset

In [None]:
 def add_latlon_coords(dataset: xr.Dataset)->xr.Dataset:
    """Adds lat/lon as dimensions and coordinates to an xarray.Dataset object."""
    
    # get attributes from dataset
    attrs = dataset.attrs
    
    # get x, y resolutions
    dx = round(float(attrs['DX']), 3)
    dy = round(float(attrs['DY']), 3)
    
    # get grid cells in x, y dimensions
    ew_len = len(dataset['east_west'])
    ns_len = len(dataset['north_south'])
    
    # get lower-left lat and lon
    ll_lat = round(float(attrs['SOUTH_WEST_CORNER_LAT']), 3)
    ll_lon = round(float(attrs['SOUTH_WEST_CORNER_LON']), 3)
    
    # calculate upper-right lat and lon
    ur_lat =  ll_lat + (dy * ns_len)
    ur_lon = ll_lon + (dx * ew_len)
    
    # define the new coordinates
    coords = {
        # create an arrays containing the lat/lon at each gridcell
        'lat': np.linspace(ll_lat, ur_lat, ns_len, dtype=np.float32, endpoint=False),
        'lon': np.linspace(ll_lon, ur_lon, ew_len, dtype=np.float32, endpoint=False)
    }
    
    lon_attrs = dataset.lon.attrs
    lat_attrs = dataset.lat.attrs
    
    # drop the original lat and lon variables
    dataset = dataset.rename({'lon':'orig_lon', 'lat':'orig_lat'})
    # rename the grid dimensions to lat and lon
    dataset = dataset.rename({'north_south': 'lat', 'east_west': 'lon'})
    # assign the coords above as coordinates
    dataset = dataset.assign_coords(coords)
    dataset.lon.attrs = lon_attrs
    dataset.lat.attrs = lat_attrs
    
    return dataset

In [None]:
control_rtg_ds = add_latlon_coords(control_rtg_ds)
nat_rtg_ds = add_latlon_coords(nat_rtg_ds)
rw_rtg_ds = add_latlon_coords(rw_rtg_ds)
noslr_rtg_ds = add_latlon_coords(noslr_rtg_ds)

In [None]:
routing_list = dict(control = control_rtg_ds,
                   naturalized = nat_rtg_ds,
                   realworld = rw_rtg_ds,
                   no_sea_level = noslr_rtg_ds )
routing_list

In [None]:
for case in routing_list.keys():
    print(case)

### Streamflow Data

open the header file

**Note: columns 1, 2 contain grid cell indices for 2km LIS output so `get_indices()` should no longer be needed**

In [97]:
header_file = '../data/mississippi-river-delta/usgs_streamflow/header_mis_002_1_short.txt'
header_df = pd.read_csv(header_file, usecols=[0, 1, 2, 3, 4], delim_whitespace=True, names=['gage_id', 'x', 'y', 'lon', 'lat'])

# change 'gage_id' dtype to 'str' and add prefix '0' 
header_df['gage_id'] = header_df['gage_id'].apply(lambda x : '0'+str(x))

In [98]:
header_df

Unnamed: 0,gage_id,x,y,lon,lat
0,7374525,312,67,-89.978,29.857
1,7374000,252,98,-91.192,30.446
2,7289000,265,192,-90.906,32.315
3,7381490,221,124,-91.798,30.983
4,7344370,118,230,-93.859,33.089
5,7367005,205,200,-92.12,32.5


In [99]:
gage_files = sorted(glob('../data/mississippi-river-delta/usgs_streamflow/*[0-9][0-9][0-9][0-9].txt'))

def gages_to_df(filelist):
    
    dfs = []
    for file in filelist:
        
        gage_id = file.split('/')[-1].strip('.txt')
        df = pd.read_csv(file, names=['date', gage_id], delim_whitespace=True,
                    parse_dates=['date'], index_col='date')
        dfs.append(df)
    return pd.concat(dfs, axis=1)

In [100]:
usgs_streamflow_df = gages_to_df(gage_files)

In [101]:
usgs_streamflow_df

Unnamed: 0_level_0,07289000,07344370,07367005,07374000,07374525,07381490
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-10-01,,71.075,,,,
1997-10-02,,82.119,,,,
1997-10-03,,95.428,,,,
1997-10-04,,93.162,,,,
1997-10-05,,97.693,,,,
...,...,...,...,...,...,...
2021-03-18,35962.398,753.228,1650.872,25315.262,25909.916,11185.155
2021-03-19,36245.566,838.179,1650.872,25570.113,26108.135,11326.739
2021-03-20,36811.902,869.327,1645.209,25938.232,26391.303,11553.274
2021-03-21,37095.070,826.852,1616.892,26278.035,26702.787,11808.126


convert gage streamflow dataframe to xarray data array

In [102]:
xa_gage_data = usgs_streamflow_df.to_xarray()
xa_gage_data

## Map & Time Series

display gage stations on map <br>
display time series of LIS routing and gage streamflow

In [103]:
# gage locations
def points():    
    kdims = ['lon', 'lat']
    vdims = ['gage_id']
    
    return gv.Points(header_df, kdims=kdims, vdims=vdims)

In [104]:
tmp = points()
pts_opts = gv.opts.Points( color='red', size=10, tools=['tap', 'hover'], nonselection_fill_alpha=0.2, nonselection_line_alpha=0.6)
dpts = hv.util.Dynamic(tmp.opts(pts_opts)).opts(width=600, height=600)

In [105]:
var_names = [ str(k) for k,v in control_rtg_ds.variables.items() if v.ndim == 3 ]
#var_names = [ str(k) for k,v in routing_ds.variables.items() if v.ndim == 3 ]

# slice time for performance; TO BE MOD with re-chunked data
test_time = slice('2019-01-01', '2020-01-01')


# support funtcion to extract gage & routing data
def get_indices(dset, lon, lat):
    x0 = round( float( dset.attrs['SOUTH_WEST_CORNER_LON'] ), 3)
    y0 = round( float( dset.attrs['SOUTH_WEST_CORNER_LAT'] ), 3)
    dx = round( float( dset.attrs['DX'] ), 3 )
    dy = round( float( dset.attrs['DY'] ), 3 )
    
    ix = int( ( lon - x0 ) // dx )
    iy = int( ( lat - y0 ) // dy )
    return dict( lon=ix, lat=iy )

def get_gage_data(gid):
    return xa_gage_data[gid].rename(date="time")


def get_routing_data(gid, vname):
    row = header_df.loc[header_df['gage_id'] == gid]
    x = row['x']
    y = row['y']
    
    routing_ds = xr.Dataset()
    for case in routing_list.keys():
        data = routing_list[case]
#         ics = get_indices(data, x, y) # no longer needed since we can use the grid cell indices directly
        rdata = data[vname].sel(time=test_time).isel( lon=x, lat=y )
        rdata.attrs['vname'] = vname
        
        routing_ds = routing_ds.assign({case : rdata})
    
    return routing_ds
    
    
def get_aligned_data(gage_id, vname= None  ):
        if (vname is None):
            streamflow_data = _null_routing_data
            gage_data= get_gage_data(gage_id)
        else:
            streamflow_data = get_routing_data( gage_id, vname)
            gage_data = get_gage_data( gage_id)
        return xr.align( streamflow_data, gage_data )
    
    
def gage_data_graph(index):
    if not index:
        return _null_gage_data.hvplot(title="No Gage").opts(ylabel='')
    else:       
        idx = index[0]
        
        row = header_df.iloc[idx]
        gid = row['gage_id']

        (rdata, gdata) = get_aligned_data( gid )
        return gdata.hvplot(title=f"Gage ID {gid}")
    
def routing_data_graph(index, vname):
    if not index:
        return _null_routing_data.hvplot(title="No Var")
    
    else:
        idx = index[0]
        row = header_df.iloc[idx]
        
        gid = row['gage_id']

        (rdata, gdata) = get_aligned_data( gid, vname )
                    
        return rdata.hvplot(title=vname)
    


define dashboard widgets

In [110]:
# widget to select variable of LIS routing
#var_names = [ str(k) for k,v in routing_data.variables.items() if v.ndim == 3 ]
var_select = pn.widgets.Select(options=var_names[:-2], value='Streamflow_tavg', name="LIS Variable List")
var_stream = Params( var_select, ['value'], rename={ 'value': 'vname' } )

_null_gage_data = xr.zeros_like(xa_gage_data['07289000'])
_null_routing_data = xr.zeros_like(get_routing_data('07289000', 'Streamflow_tavg'))

In [111]:
#pts_opts = gv.opts.Points( color='red', size=10, tools=['tap', 'hover'], nonselection_fill_alpha=0.2, nonselection_line_alpha=0.6)
#dpoints = hv.util.Dynamic(points(header_df).opts(pts_opts)).opts(width=600, height=400)
tiles = gv.tile_sources.EsriImagery()

# select gage location through click on map
select_stream = Selection1D(source=dpts)

_null_gage_data = xr.zeros_like(xa_gage_data['07289000'])
gage_graph = hv.DynamicMap(gage_data_graph, streams=[select_stream])
rout_graph = hv.DynamicMap(routing_data_graph, streams=[select_stream, var_stream])

pn.Row(dpts*tiles, pn.Column(var_select,
                             gage_graph,
                             rout_graph))