In [1]:
import numpy as np
import xarray as xr
from scipy.stats import kendalltau

In [2]:
def kendallstau(x,y):
    overlap = np.isfinite(x) * np.isfinite(y)
    
    tau = kendalltau(x[overlap],y[overlap])
    
    return np.array([tau[0],tau[1]])

def pot(da,threshold,dim):

    return da.where(da>da.quantile(threshold,dim=dim))

#to-do generate function for UTDC, need pycops package?

First, open rain & surge. For now, using an example for MPI-ESM1-2-HR, this should be adapted to the workflow to derive the rain & surge later.

In [3]:
rain_surges = xr.open_mfdataset('/home/jovyan/CMIP6cf/notebooks/MPI-ESM1-2-HR_gn_day_tg_*')
#surges = xr.open_dataset('/home/jovyan/CMIP6cf/notebooks/MPI-ESM1-2-HR_gn_day_tg_surges.nc')
#rain_surges = xr.merge((rain,surges))

Now construct multiple time windows to apply the analysis to, for now using a window length of 30 years:

In [13]:
output_yrs = np.arange(1875,2100,15)

#remove leap days so that each computation window has the same length
rain_surges = rain_surges.sel(time=~((rain_surges.time.dt.month == 2) & (rain_surges.time.dt.day == 29))) 
days_in_year = int(np.mean(rain_surges.time.resample(time='1Y').count().values))

window_start_idx = days_in_year*(output_yrs-1850-14) #using window length of 30 here
first_window_idx = np.arange(0*days_in_year,30*days_in_year)

window_idx = xr.DataArray(
    data=first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:],
    dims=["time_in_window_idx","window_center"],
    coords=dict(
        time_in_window_idx=first_window_idx,
        window_center=output_yrs
    ),
)

rain_surges_windows = rain_surges.isel(time=window_idx)

Configure and apply the bivariate sampling:

In [14]:
max_lag = 2
declustering_window = 3
threshold = .95

In [15]:
#compute peaks
pr_peaks = pot(rain_surges_windows['pr'],threshold,dim='time_in_window_idx')
surge_peaks = pot(rain_surges_windows['surges'],threshold,dim='time_in_window_idx')

Decluster (logic here = only consider a peak if it is the maximum of a window of 3 days centered on the peak, may not be optimal for bivariate analysis)

In [16]:
pr_peaks_declustered = pr_peaks.where(pr_peaks==pr_peaks.rolling(time_in_window_idx=declustering_window,center=True,min_periods=1).max(skipna=True)) #as in the AFs paper
surge_peaks_declustered = surge_peaks.where(surge_peaks==surge_peaks.rolling(time_in_window_idx=declustering_window,center=True,min_periods=1).max(skipna=True))

We want the maximum co-occurring value within a lag of -/+ max_lag days, that's equivalent to pairing the peaks with rolling(2 times max_lag+1).max() of the other variable (right?)

In [17]:
surge_rollmax = rain_surges_windows.surges.rolling(time_in_window_idx=max_lag*2+1,center=True,min_periods=1).max()
pr_rollmax = rain_surges_windows.pr.rolling(time_in_window_idx=max_lag*2+1,center=True,min_periods=1).max()

In [18]:
surge_peaks_rollmax = surge_peaks.rolling(time_in_window_idx=max_lag*2+1,center=True,min_periods=1).max(skipna=True)
pr_peaks_rollmax = pr_peaks.rolling(time_in_window_idx=max_lag*2+1,center=True,min_periods=1).max(skipna=True)

In [19]:
def kendallstau(x,y):
    overlap = np.isfinite(x) * np.isfinite(y)
    
    tau = kendalltau(x[overlap],y[overlap])
    
    return np.array([tau[0],tau[1]])


ktau_pr_cdon_surge = xr.apply_ufunc(
                    kendallstau, surge_peaks_declustered.load(), pr_rollmax.load(),
                    input_core_dims=[["time_in_window_idx"], ["time_in_window_idx"]], #core dimension: time, loop over the others
                    output_core_dims=[["statistic"]], #outputs tau and p
                    vectorize=True, 
                    dask='allowed', #allow calculating in chunks (dask='parallelized' doesn't work)
                    output_dtypes=[float],
                    output_sizes={"statistic": 2}, #output must be numpy array
                    )

ktau_surge_cdon_pr = xr.apply_ufunc(
                    kendallstau, pr_peaks_declustered.load(), surge_rollmax.load(),
                    input_core_dims=[["time_in_window_idx"], ["time_in_window_idx"]], #core dimension: time, loop over the others
                    output_core_dims=[["statistic"]], #outputs tau and p
                    vectorize=True, 
                    dask='allowed', #allow calculating in chunks (dask='parallelized' doesn't work)
                    output_dtypes=[float],
                    output_sizes={"statistic": 2}, #output must be numpy array
                    )
'''
ktau_both = xr.apply_ufunc(
                    kendallstau, pr_peaks_declustered.load(), surge_peaks_declustered.load(),
                    input_core_dims=[["time_in_window_idx"], ["time_in_window_idx"]], #core dimension: time, loop over the others
                    output_core_dims=[["statistic"]], #outputs tau and p
                    vectorize=True, 
                    dask='allowed', #allow calculating in chunks (dask='parallelized' doesn't work)
                    output_dtypes=[float],
                    output_sizes={"statistic": 2}, #output must be numpy array
                    )
'''
           
ktau = xr.concat([ktau_pr_cdon_surge,ktau_pr_cdon_surge],dim='extreme_variate')
ktau = ktau.assign_coords({'extreme_variate':['surges','rain'],'statistic':['tau','p']})

**NB: think about how to decluster & apply lag for the case where both variates are peaks**

In [20]:
ktau