In [2]:
# Standard Python modules
import os, sys
import glob
import numpy as np
import pandas as pd
import xarray as xr
import re

# import personal modules
# Path to modules
sys.path.append('../modules')
# Import my modules
from utils import roundPartial, find_closest_MERRA2_lon
from trajectory import combine_IVT_and_trajectory, combine_arscale_and_trajectory

path_to_data = '/expanse/nfs/cw3e/cwp140/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures


## load Rutz AR
print('Loading Rutz AR data')
fname_pattern = path_to_data + 'preprocessed/MERRA2/MERRA2_Rutz_latlon_*.nc'
ar = xr.open_mfdataset(fname_pattern)

## load AR scale
print('Loading ERA5 AR scale')
fname_pattern = path_to_data + 'preprocessed/ARScale_ERA5/ERA5_ARScale_*.nc'
arscale = xr.open_mfdataset(fname_pattern)

## Load tARgetv4 AR data
fname = path_to_data + 'preprocessed/tARgetv4/globalARcatalog_ERA5_2000-2023_v4.0.nc'
tARgetv4 = xr.open_dataset(fname)

## load HUC8 IDs
print('Loading HUC8 IDs')
fname = path_to_data + 'preprocessed/PRISM/PRISM_HUC8_CO_sp.nc'
ds = xr.open_dataset(fname)
HUC8_IDs = ds.HUC8.values ## get list of HUC8 IDs
HUC8_IDs = ['14050001']

## loop through all HUC8s
for i, HUC8_ID in enumerate(HUC8_IDs):
    print(i, HUC8_ID)
    ## load watershed trajectories
    fname = path_to_data + 'preprocessed/ERA5_trajectories/PRISM_HUC8_{0}.nc'.format(HUC8_ID)
    ERA5 = xr.open_dataset(fname)
    ERA5 = ERA5.assign_coords({"lon": ERA5.longitude, "lat": ERA5.latitude, "time": ERA5.time})
    ERA5 = ERA5.drop_vars(["latitude", "longitude"])


ERA5 = ERA5.isel(start_date=417)
ERA5

Loading Rutz AR data
Loading ERA5 AR scale


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


Loading HUC8 IDs
0 14050001


  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


In [3]:
t = xr.DataArray(ERA5.time.values, dims=['location'], name='time') 

# create a list of lat/lons that match ERA5 spacing
x = xr.DataArray(roundPartial(ERA5.lon.values, 0.25), dims=['location'])
y = xr.DataArray(roundPartial(ERA5.lat.values, 0.25), dims=['location'])

x = xr.DataArray(ERA5.lon.values, dims=("location"), coords={"lon": x}, name='traj_lons')
y = xr.DataArray(ERA5.lat.values, dims=("location"), coords={"lat": y}, name='traj_lats')

# create a new dataset that has the trajectory lat and lons and the closest ERA5 lat/lons as coords
z = xr.merge([x, y, t])

## Open csv file with coastal coordinates for N. America (ERA5 resolution)
textpts_fname = '../out/latlon_coast_ERA5.csv'
txtpts = pd.read_csv(textpts_fname, header=0)

## Now loop through the lat/lon pairs and see where they match
idx_lst = []
for i, (x, y) in enumerate(zip(z.lon.values, z.lat.values)):
    for j, (lon, lat) in enumerate(zip(txtpts.lon.values, txtpts.lat.values)):
        ## test if lat/lon pair matches
        result_variable = (lon-0.5 <= x < lon+0.5) & (lat-0.5 <= y < lat+0.5)

        if (result_variable == True):
            idx = (i, j) # (index of z, index of txtpts)
            idx_lst.append(idx)

if len(idx_lst) > 0:
    ## take first time the trajectory crosses the coast
    idx_lat = txtpts.iloc[idx_lst[0][1]].lat # the lat of the txtpts where the trajectory crosses
    idx_lon = txtpts.iloc[idx_lst[0][1]].lon # the lon of the txtpts where the trajectory crosses

    ## this is the time of the trajectory when it crosses west coast
    time_match = z.sel(location=idx_lst[0][0]).time.values
    ts = pd.to_datetime(str(time_match)).strftime('%Y-%m-%d %H')
    ERA5 = ERA5.assign(time_match=ts)

    #####################
    ### STRICT METHOD ###
    #####################

    ## Gather arscale of closest grid and time value
    arscale_val = arscale.sel(lat=idx_lat, lon=idx_lon, time=time_match, method='nearest')['rank'].values
    ERA5 = ERA5.assign(ar_scale_strict=arscale_val)

    ## Gather Rutz AR value of closest grid and time value
    ar_val = ar.sel(lat=idx_lat, lon=idx_lon, time=time_match, method='nearest')['AR'].values
    ERA5 = ERA5.assign(ar_strict=ar_val)

    ## Gather tARgetv4 AR value of closest grid and time value
    tARget_val = tARgetv4.sel(lat=idx_lat, lon=idx_lon, time=time_match, method='nearest')['kidmap'].values
    ERA5 = ERA5.assign(tARget_strict=tARget_val)

    ## Gather coastal IVT value
    coastal_IVT_val = arscale.sel(lat=idx_lat, lon=idx_lon, time=time_match, method='nearest')['IVT'].values
    ERA5 = ERA5.assign(coastal_IVT_strict=coastal_IVT_val)


    #######################
    ### FLEXIBLE METHOD ###
    #######################

    ## select the 12 hours on each side of the time step
    ## select the surrounding grid points within 1 degree
    sta = time_match - np.timedelta64(12,'h')
    sto = time_match + np.timedelta64(12,'h')

    ## Gather AR Scale value
    tmp = arscale.sel(lat=slice(idx_lat-2, idx_lat+2), lon=slice(idx_lon-1, idx_lon+1), time=slice(sta, sto))
    arscale_val = tmp['rank'].max().values

    ## Gather coastal IVT value
    coastal_IVT_val = tmp['IVT'].max().values

    ## now put those values into the trajectory dataset
    ERA5 = ERA5.assign(ar_scale=arscale_val)
    ERA5 = ERA5.assign(coastal_IVT=coastal_IVT_val)

    ## Gather Rutz AR and tARgetv4 value
    try:
        tmp1 = ar.sel(lat=slice(idx_lat-1, idx_lat+1), lon=slice(idx_lon-1, idx_lon+1), time=slice(sta, sto))
        ar_val = tmp1.AR.values.max()

        tmp2 = tARgetv4.sel(lat=slice(idx_lat-1, idx_lat+1), lon=slice(idx_lon-1, idx_lon+1), time=slice(sta, sto))
        tARget_val = tmp2.kidmap.values.max()
    except ValueError:
        ar_val = np.nan
        tARget_val = np.nan

    ## assign value to trajectory dataset
    ERA5 = ERA5.assign(ar=ar_val)
    ERA5 = ERA5.assign(tARget=tARget_val)


else:
    ## since the trajectory didn't cross the west coast, set ar_scale to nan
    ERA5 = ERA5.assign(ar_scale=np.nan)
    ERA5 = ERA5.assign(ar=np.nan)
    ERA5 = ERA5.assign(tARget=np.nan)
    ERA5 = ERA5.assign(coastal_IVT=np.nan)

    ERA5 = ERA5.assign(ar_scale_strict=np.nan)
    ERA5 = ERA5.assign(ar_strict=np.nan)
    ERA5 = ERA5.assign(tARget_strict=np.nan)
    ERA5 = ERA5.assign(coastal_IVT_strict=np.nan)
    ERA5 = ERA5.assign(time_match='nan')

ValueError: zero-size array to reduction operation fmax which has no identity

In [6]:
tmp1 = ar.sel(lat=slice(idx_lat-1, idx_lat+1), lon=slice(idx_lon+1, idx_lon-1), time=slice(sta, sto))
tmp1

ImportError: Dask diagnostics requirements are not installed.

Please either conda or pip install as follows:

  conda install dask                     # either conda install
  python -m pip install "dask[diagnostics]" --upgrade  # or python -m pip install

<xarray.Dataset>
Dimensions:  (time: 8, lat: 5, lon: 0)
Coordinates:
  * time     (time) datetime64[ns] 2005-02-13T06:00:00 ... 2005-02-14T03:00:00
  * lat      (lat) float64 52.0 52.5 53.0 53.5 54.0
  * lon      (lon) float64 
Data variables:
    AR       (time, lat, lon) float64 dask.array<chunksize=(8, 5, 0), meta=np.ndarray>

In [10]:
txtpts.lon.min()

-140.0

In [11]:
idx_lat

53.0