In [8]:
import xarray as xr
import numpy as np
import pandas as pd

In [9]:
HUC8_ID = '14050001'
event_date = '20001024'

## set starting lat/lon
## choose this based on extreme precip days
path_to_data = '/expanse/nfs/cw3e/cwp140/'
fname = path_to_data + 'preprocessed/PRISM/PRISM_HUC8_CO_sp.nc'
ds = xr.open_dataset(fname)
# start with single event from single watershed
# ds = ds.sel(HUC8=HUC8_ID)
# ds = ds.where(ds.extreme == 1, drop=True)
# ds = ds.where(ds.prec >= 2.54, drop=True)
ds = ds.sel(date=slice('2000-01-03', '2023-12-31'))
ds

In [12]:
def get_dates(ds, event_date, start_time=0, lat_offset=0, lon_offset=0):
    
    ## get center_date, start_lat, and start_lon
    ## center the date based on what hour you want to run the trajectory
    center_date = ds.sel(date=event_date).date.values + np.timedelta64(start_time,'h')
    start_lat = ds.sel(date=event_date).lat.values + lat_offset
    start_lon = ds.sel(date=event_date).lon.values + lon_offset
    start_lev = ds.sel(date=event_date).sp_start.values
    print(center_date, start_lat, start_lon)
    
    varlst = ['time', 'latitude', 'longitude', 'level', 'q', 'u', 'v', 'w']     
    calc_vars = ['drying_ratio', 'dq']
    
    date_lst = pd.date_range(end=center_date, periods=72, freq='H')

    ## create list of dates based on start date
    start_date = ds.sel(date=event_date).date.values - np.timedelta64(2,'D')
    end_date = ds.sel(date=event_date).date.values
    date_lst_era = pd.date_range(start_date, end_date, freq='1D')

    return date_lst_era, start_date, end_date

def read_data(date_lst_era, start_date, end_date):    
    # read ERA5 data
    # create list of daily ERA5 files for each AR
    # ds_lst = []
    fname_lst = []
    for j, date in enumerate(date_lst_era):
        year = date.year
        month = date.strftime("%m")
        day = date.strftime("%d")
        
        path_to_data = '/expanse/nfs/cw3e/cwp140/downloads/ERA5/ERA5/{0}/'.format(year)
        fname = "era5_nhemi_025dg_1hr_uvwq_{0}{1}{2}.nc".format(year, month, day)
        fname_lst.append(path_to_data+fname) # ERA5 pressure level data fname_lst
        
        path_to_data = '/expanse/nfs/cw3e/cwp140/preprocessed/ERA5/'
        IVT_fname = path_to_data + 'ivt/{0}{1}_IVT.nc'.format(year, month)
        zerodeg_fname = path_to_data + 'zero_degree_level/{0}_deg0l.nc'.format(year)
    
        ## end
        
    ds1 = xr.open_mfdataset(fname_lst, engine='netcdf4', combine='by_coords')
    ds1 = ds1.sel(time=slice(start_date, end_date))

    ## Read ERA5 IVT data
    ## read the file, then preprocess to same area and start and end dates
    IVT = xr.open_dataset(IVT_fname)
    ## rename lat/lon to latitude/longitude
    IVT = IVT.rename({'lon': 'longitude', 'lat': 'latitude'})
    IVT = IVT.sel(time=slice(start_date, end_date))

    zerodeg = xr.open_dataset(zerodeg_fname)
    zerodeg = zerodeg.assign_coords({"longitude": (((zerodeg.longitude + 180) % 360) - 180)}) # Convert DataArray longitude coordinates from 0-359 to -180-179
    zerodeg = zerodeg.sel(time=slice(start_date, end_date))


    ## Merge pressure level files with IVT and freezing level
    ds1 = xr.merge([ds1, IVT, zerodeg])

    return ds1

In [13]:
date_lst_era, start_date, end_date = get_dates(ds, event_date)
date_lst_era, start_date, end_date

2000-10-24T00:00:00.000000000 [37.17830581 36.86011599 38.77387292 38.37824174 40.45464386 37.07544366
 38.35886588 37.73207925 36.74434346 38.57787925 39.58480806 38.83173677
 37.04712646 39.80997281 40.71510667 39.67820879 40.50679123 41.07132082
 40.9987667  39.3859446  40.45357198 36.47901196 40.08036951 41.06342867
 40.74929937 38.34106245 38.64082921 40.97613735 38.78083415 40.91045587
 41.24035019 39.95128154 36.96500301 38.961431   37.38703813 40.47054768
 40.97793564 39.94120453 37.35237619 37.33356496 38.09824841 37.32072569
 40.0030748  39.64136105 40.54380555 37.6060869  40.15977775 39.42772501
 38.0153005  38.60606465 39.26250104 39.57530218 39.66254545 41.27993149
 39.09867584 40.72865965 40.83819602 41.23397426 37.31786286 38.31436499
 39.86440058 40.03575369 36.44144211 37.64093829 38.88036491 38.84563894
 41.42103973 40.625663   37.34866858 39.48151723 37.6669263  37.3769644
 36.99460598 38.94075083 38.37446902 37.87953385 38.7313996  39.75909205
 40.39998674 38.136950

(DatetimeIndex(['2000-10-22', '2000-10-23', '2000-10-24'], dtype='datetime64[ns]', freq='D'),
 numpy.datetime64('2000-10-22T00:00:00.000000000'),
 numpy.datetime64('2000-10-24T00:00:00.000000000'))

In [25]:
ds1 = read_data(date_lst_era, start_date, end_date)
ds1

  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
   

ImportError: Dask diagnostics requirements are not installed.

Please either conda or pip install as follows:

  conda install dask                     # either conda install
  python -m pip install "dask[diagnostics]" --upgrade  # or python -m pip install

<xarray.Dataset>
Dimensions:    (longitude: 1440, latitude: 721, level: 37, time: 73)
Coordinates:
  * longitude  (longitude) float64 -180.0 -179.8 -179.5 ... 179.2 179.5 179.8
  * latitude   (latitude) float64 -90.0 -89.75 -89.5 -89.25 ... 89.5 89.75 90.0
  * level      (level) int32 1000 975 950 925 900 875 850 ... 20 10 7 5 3 2 1
  * time       (time) datetime64[ns] 2000-10-21 ... 2000-10-24
Data variables:
    q          (time, level, latitude, longitude) float32 dask.array<chunksize=(24, 37, 721, 1440), meta=np.ndarray>
    u          (time, level, latitude, longitude) float32 dask.array<chunksize=(24, 37, 721, 1440), meta=np.ndarray>
    v          (time, level, latitude, longitude) float32 dask.array<chunksize=(24, 37, 721, 1440), meta=np.ndarray>
    w          (time, level, latitude, longitude) float32 dask.array<chunksize=(24, 37, 721, 1440), meta=np.ndarray>
    IVT        (time, latitude, longitude) float64 ...
    deg0l      (time, latitude, longitude) float32 ...
Attribut