# mwb_flow prep example 2 for pulling volume weighted gridmet data
This example shows code from metdata_update.py that has been updated to include the GRIDtools.grid_area_weighted_volume function. Here, data inputs for the mwb_flow model are retrieved using the GridMet and averaged over the watershed area if temperature or converted to a volume if precipitation.
Important note, this example uses the metdata.py code but is generally the same code as the metdata_update.py module. prep_Example1.1_PullData.ipynb is available to run this code from a function.  

In [1]:
import os
from pathlib import Path
import geopandas as gpd
import xarray as xr

# Block of imports needed for GRIDtool.grid_area_weighted_volume
import rasterio as rio
import pandas as pd
from chmdata.thredds import GridMet, BBox
import numpy as np
from shapely.geometry import Polygon




mwb_flow_dir = r'C:\Users\CND905\Downloaded_Programs\mwb_flow'
os.chdir(mwb_flow_dir)

from prep.datafile import CreateInputFile
from prep.metdata import get_gridmet_at_points
from prep.datafile import check_format


import py3dep
from tqdm import tqdm
from prep.utils import get_gridmet_cells
from config import GRIDMET_PARAMS, VOL_PARAMS
import GRIDtools as gt


Initializing mwb_flow.prep module.


Import a shape file with deliniated watershed polygon. This shape file has an attribute table with a column used to index the geometries. In this case, a column gage station numbers was used since the numbers will later be used to merge meterologic data with streamflow data.

In [None]:
exres_pth = Path(r'C:\Users\CND905\Downloaded_Programs\mwb_flow\Examples\data\Lolo_WB_Model_Calibration_Catchments_32611.shp')
exres = gpd.read_file(exres_pth)
# This file is in crs 32611 (WGS84 UTM zone 11N), need it to be 4326 for getting GridMET.
exres = exres.to_crs(4326)


In [3]:
# get_gridmet_at_points()
in_geom = exres
gdf_index_col = "gageID"
start='2016-01-01'
end='2016-01-05'
crs = 4326

# grid_area_weighted_volume()
# geom_id_col = 'gageID'
geom_id_col = gdf_index_col


if gdf_index_col is not None:
    ixcol = gdf_index_col
else:
    in_geom['ixcol'] = in_geom.index
    ixcol = 'ixcol'

location_ids = in_geom[ixcol].to_list()

if (in_geom.geometry.geom_type == 'Point').all():
    coords = list(zip(in_geom.geometry.x, in_geom.geometry.y))
elif (in_geom.geometry.geom_type == 'Polygon').all():
    coords = list(zip(in_geom.geometry.centroid.x, in_geom.geometry.centroid.y))
else:
    coords = None
    raise ValueError("Mixed geometry types were found in the input GeoDataFrame. Mixed Geometry is not supported.")

loc_lat = []
loc_lon = []
loc_elev = py3dep.elevation_bycoords(coords, crs=crs)  # only 4326 or NAD83 works with py3dep

if isinstance(loc_elev, list):
    loc_elev = loc_elev
else:
    loc_elev = [loc_elev]

loc_gdf = in_geom[['{0}'.format(ixcol), 'geometry']]

print("Retrieving GridMET cells...")
gmt_cells = get_gridmet_cells(loc_gdf)
unq_cells = gmt_cells['cell_id'].unique()
print("{0} unique GridMET cells found for {1} input features.".format(len(unq_cells), len(loc_gdf[ixcol])))

gmt_cntrs = gmt_cells.drop_duplicates(subset='cell_id').centroid

# Parameters retrieved to be averaged over watershed area here
tmmn = []
tmmx = []

cdsets = {}
print("Fetching GridMET data for unique cells...")
for cell in tqdm(unq_cells, desc='Cells'):
    clon = gmt_cntrs[cell].x
    clat = gmt_cntrs[cell].y
    datasets = []
    for p in GRIDMET_PARAMS:
        s = start
        e = end
        ds = GridMet(p, start=s, end=e, lat=clat, lon=clon).get_point_timeseries()
        datasets.append(ds)
    cdsets[cell] = datasets

# Parameters retried to be converted wot weighted volumes here
if len(VOL_PARAMS)> 1:
    raise ValueError("GRIDtools.grid_area_weighted_volume() is only compatible with the precip parameter")

# volparam_list = []
for p in VOL_PARAMS:
    bnds = in_geom.total_bounds
    gmet = GridMet(variable= p, start=start, end=end, bbox=BBox(bnds[0]-0.5, bnds[2]+0.5, bnds[3]+0.5, bnds[1]-0.5))

    gmet = gmet.subset_nc(return_array=True)
    gmet_input = gmet[list(gmet.data_vars)[0]]
    vol_xds = gt.grid_area_weighted_volume(gmet_input, in_geom, 'gageID')
    # volparam_list.append(vol_xds)
# xr.merge(volparam_list)

for i in range(len(coords)):
    c = coords[i]
    loc = location_ids[i]
    gmtcell_ids = gmt_cells[gmt_cells[ixcol] == loc]
    lon, lat = c
    loc_lat.append(lat)
    loc_lon.append(lon)


    if len(gmtcell_ids.index) > 1:

        tmmnm = []
        tmmxm = []

        for cid in gmtcell_ids['cell_id']:
            dset = cdsets[cid]

            tmmnm.append(dset[GRIDMET_PARAMS.index('tmmn')])
            tmmxm.append(dset[GRIDMET_PARAMS.index('tmmx')])

        tmmnm_d = pd.concat(tmmnm)
        tmmxm_d = pd.concat(tmmxm)

        tmmn.append(tmmnm_d.groupby(tmmnm_d.index).mean())
        tmmx.append(tmmxm_d.groupby(tmmxm_d.index).mean())

    else:
        dset = cdsets[gmtcell_ids['cell_id'].values[0]]
        tmmn.append(dset[GRIDMET_PARAMS.index('tmmn')])
        tmmx.append(dset[GRIDMET_PARAMS.index('tmmx')])

mean_xds = xr.Dataset(
    {
        "min_temp": (['time', 'location'], pd.concat(tmmn, axis=1), {'standard_name': 'Minimum Temperature',
                                                                    'units': 'Kelvin'}),
        "max_temp": (['time', 'location'], pd.concat(tmmx, axis=1), {'standard_name': 'Maximum Temperature',
                                                                    'units': 'Kelvin'})
    },
    coords={
        "lat": (['location'], loc_lat, {'standard_name': 'latitude',
                                        'long_name': 'location_latitude',
                                        'units': 'degrees',
                                        'crs': '4326'}),
        "lon": (['location'], loc_lon, {'standard_name': 'longitude',
                                        'long_name': 'location_longitude',
                                        'units': 'degrees',
                                        'crs': '4326'}),
        "elev": (['location'], loc_elev, {'standard_name': 'elevation',
                                        'long_name': 'location_elevation',
                                        'units': 'meters'}),
        "location": (['location'], location_ids, {'long_name': 'location_identifier',
                                        'cf_role': 'timeseries_id'}),
        "time": tmmn[0].index
    },
    attrs={
        "featureType": 'timeSeries',
    }
)

xr.merge([mean_xds, vol_xds])


  coords = list(zip(in_geom.geometry.centroid.x, in_geom.geometry.centroid.y))


Retrieving GridMET cells...



  gmt_cntrs = gmt_cells.drop_duplicates(subset='cell_id').centroid


70 unique GridMET cells found for 6 input features.
Fetching GridMET data for unique cells...


Cells: 100%|██████████| 70/70 [01:53<00:00,  1.62s/it]
