In [1]:
import numpy as np
import xarray as xr
import os
import fnmatch
from tqdm.autonotebook import tqdm
import dask
import sys
import gcsfs
fs = gcsfs.GCSFileSystem() # equivalent to fsspec.fs('gs')
sys.path.insert(0, '/home/jovyan/CMIP6cf/cmip6cf/')

  from tqdm.autonotebook import tqdm


In [10]:
fs.ls('gs://leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs')

['leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/ACCESS-CM2',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/CESM2',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/CESM2-WACCM',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/CMCC-CM2-SR5',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/CMCC-ESM2',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/CanESM5',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/EC-Earth3',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/EC-Earth3-Veg',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/GFDL-CM4',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/GFDL-ESM4',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/HadGEM3-GC31-LL',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/HadGEM3-GC31-MM',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/IITM-ESM',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/INM-CM4-8',
 'leap-persistent/timh37/CMIP6/subsetted_data/pr_tgs/INM-CM5-0',
 'leap-pers

In [2]:
def po_t_of_refyear(da,threshold,refyear,dim):
    return da.where(da>da.sel(window=refyear).quantile(threshold,dim=dim))
            
def rolling_max(da,window_len,dim):
    return da.rolling({dim:window_len},center=True,min_periods=1).max()

def sum_num_extremes_pmonth(extremes):
    extremes_ = extremes.copy(deep=True) #boolean array (True or False joint extreme occurs on that day)
    if len(extremes.time.shape)>1:
        extremes_['time_in_window_idx'] = extremes_.time.dt.month.isel(window=0).values
    else:
        extremes_['time_in_window_idx'] = extremes_.time.dt.month.values
    num_extremes_pmonth = extremes_.rename({'time_in_window_idx':'month'}).groupby('month').sum()
    return num_extremes_pmonth        

Configure the bivariate sampling:

In [3]:
#configure bivariate sampling settings
max_lag = 0 #days
declus_window_len = 1 #days
threshold = .98 #quantile

output_yrs = np.arange(1960,2100,20)
window_len=40 #may need to increase? indicate settings in output folder?

ref_year = 2000 #period to to compute thresholds from

In [4]:
#configure the CMIP6 input settings
var1 = 'surge'
var2 = 'pr'
domain = 'tgs' #'europe' or 'tgs'

var1_dir = 'leap-persistent/timh37/CMIP6/timeseries/'+var1+'_'+domain
var2_dir = 'leap-persistent/timh37/CMIP6/timeseries/'+var2+'_'+domain

if domain == 'tgs':
    input_is_gridded = False
elif domain == 'europe':
    input_is_gridded = True

In [5]:
models_var1 = [k.split('/')[-1] for k in fs.ls(var1_dir)]
models_var2 = [k.split('/')[-1] for k in fs.ls(var2_dir)]
source_ids = sorted(list(set(models_var1) & set(models_var2))) #intersection of models

for source_id in [k for k in source_ids if ~k.startswith('.')]: #loop over models
  
    var1_model_path = os.path.join(var1_dir,source_id)
    var2_model_path = os.path.join(var2_dir,source_id)
    
    #sfcWind_exps = [s.split('_')[-1][0:-3] for s in os.listdir(sfcWind_path) if s.startswith('.')==False]
    #pr_exps = [s.split('_')[-1][0:-3] for s in os.listdir(pr_path) if s.startswith('.')==False]
    
    #get experiment_id's
    var1_exps = [s.split('/')[-1].split('_')[-1][0:-5] for s in fs.ls(var1_model_path) if s.startswith('.')==False] 
    var2_exps = [s.split('/')[-1].split('_')[-1][0:-5] for s in fs.ls(var2_model_path) if s.startswith('.')==False]
    experiment_ids = list(set(var1_exps) & set(var2_exps))

    for experiment_id in experiment_ids: #loop over experiments
        #load data:
        fn = fnmatch.filter(fs.ls(var1_model_path),'*'+experiment_id+'*')[0]
        fn = fn.split('/')[-1]
        print('Processing file: '+fn)
        if input_is_gridded==False:
            var1_var2_data = xr.open_mfdataset((os.path.join('gs://',var1_model_path,fn),os.path.join('gs://',var2_model_path,fn)),engine='zarr',chunks={'member_id':1,'time':100000})
        else:
            #sfcWind_pr = xr.open_mfdataset((os.path.join(sfcWind_path,fn),os.path.join(pr_path,fn)),chunks={'member_id':1,'time':100000,'longitude':3})#.sel(longitude=np.arange(-25,11))
            var1_var2_data = xr.open_mfdataset((os.path.join('gs://',var1_model_path,fn),os.path.join('gs://',var2_model_path,fn)),engine='zarr',chunks={'member_id':1,'time':100000,'longitude':5})
 
        #generate output paths
        #model_path = os.path.join('/home/jovyan/CMIP6cf/output/dependence/sfcWind_pr_europe/40yr_p98_lag0d_declus1d_ref2000',sfcWind_pr.source_id)
        output_path = '/home/jovyan/CMIP6cf/output/num_extremes/'+var1+'_'+var2+'_'+var1_dir.split('_')[-1]+'/'+str(window_len)+'yr_'+str(threshold).replace('0.','p')+'_lag'+str(max_lag)+'d_declus'+str(declus_window_len)+'d_ref'+str(ref_year)
        output_model_path = os.path.join(output_path,var1_var2_data.source_id)
        output_fn = os.path.join(output_model_path,fn.replace('.zarr','.nc'))

        #construct time window indices
        if len(np.unique(var1_var2_data.time.resample(time='1Y').count()))>1: #remove leap days so that each computation window has the same length
            with dask.config.set(**{'array.slicing.split_large_chunks': True}):
                var1_var2_data = var1_var2_data.sel(time=~((var1_var2_data.time.dt.month == 2) & (var1_var2_data.time.dt.day == 29))) #^probably (hopefully) only has a small effect on the results
        
        days_in_year = int(var1_var2_data.time.resample(time='1Y').count()[0])
        
        if window_len%2 !=0: #odd
            window_start_idx = days_in_year*(output_yrs-1850-int(np.floor(window_len/2)))
            first_window_idx = np.arange(0*days_in_year,window_len*days_in_year)
        else: #even
            window_start_idx = days_in_year*(output_yrs-1850-int(window_len/2)+1)
            first_window_idx = np.arange(0*days_in_year,window_len*days_in_year)
        
        if np.max(first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:])>=len(var1_var2_data.time): #if window exceeds simulation length
            continue #skip
            #raise Exception('Windows exceed simulation length.')
            
        window_idx = xr.DataArray( #indices of windows
            data=first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:],
            dims=["time_in_window_idx","window"],
            coords=dict(
                time_in_window_idx=first_window_idx,
                window=output_yrs
            ),
        )
        
        if not os.path.exists(output_path):
            os.mkdir(output_path)
        if not os.path.exists(output_model_path):
            os.mkdir(output_model_path)
            
        for m,member in tqdm(enumerate(var1_var2_data.member_id)): #loop over members of each model to compute the dependence
        
            var1_var2_data_mem = var1_var2_data.sel(member_id=member)
            with dask.config.set(**{'array.slicing.split_large_chunks': False}):
                var1_var2_data_wdws = var1_var2_data_mem.isel(time=window_idx) #select data in user-defined time windows
            
            data_is_complete = np.isfinite(var1_var2_data_wdws[var1]).all(dim='time_in_window_idx') * np.isfinite(var1_var2_data_wdws[var2]).all(dim='time_in_window_idx') #check data-completeness in each window

            #derive peaks
            var1_peaks = po_t_of_refyear(var1_var2_data_wdws[var1],threshold,ref_year,dim='time_in_window_idx')
            var2_peaks = po_t_of_refyear(var1_var2_data_wdws[var2],threshold,ref_year,dim='time_in_window_idx')
            
            var1_peaks_declustered = var1_peaks.where(var1_peaks==var1_peaks.rolling({'time_in_window_idx':declus_window_len},center=True,min_periods=1).max(skipna=True))
            var2_peaks_declustered = var2_peaks.where(var2_peaks==var2_peaks.rolling({'time_in_window_idx':declus_window_len},center=True,min_periods=1).max(skipna=True))
            
            #determine joint extremes within 'max_lag' lag from eachother
            joint_extremes = np.isfinite((rolling_max(var2_peaks_declustered,max_lag*2+1,dim='time_in_window_idx')*var1_peaks_declustered)) #previously: 'co_occurring'
            
            #generate output dataset for current member
            num_extremes_mem = sum_num_extremes_pmonth(joint_extremes).to_dataset(name='num_joint_extremes')
            num_extremes_mem['num_'+var1+'_extremes'] = sum_num_extremes_pmonth(np.isfinite(var1_peaks_declustered))
            num_extremes_mem['num_'+var2+'_extremes'] = sum_num_extremes_pmonth(np.isfinite(var2_peaks_declustered))
            
            ####DECOMPOSITION OF CHANGES (probably not correct if declustering!!):
            #1) sort (in magnitude) values in reference period to determine the equivalent threshold percentiles in other windows
            if input_is_gridded:
                sorted_var1_ref = xr.DataArray(data=np.sort(var1_var2_data_wdws.sel(window=ref_year)[var1],axis=0),dims=['time_in_window_idx','latitude','longitude'],
                                                   coords=dict(time_in_window_idx=var1_var2_data_wdws.time_in_window_idx,latitude=var1_var2_data_wdws.latitude,longitude=var1_var2_data_wdws.longitude)).chunk({'longitude':5})
                sorted_var2_ref = xr.DataArray(data=np.sort(var1_var2_data_wdws.sel(window=ref_year)[var2],axis=0),dims=['time_in_window_idx','latitude','longitude'],
                                              coords=dict(time_in_window_idx=var1_var2_data_wdws.time_in_window_idx,latitude=var1_var2_data_wdws.latitude,longitude=var1_var2_data_wdws.longitude)).chunk({'longitude':5})
            else:
                sorted_var1_ref = xr.DataArray(data=np.sort(var1_var2_data_wdws.sel(window=ref_year)[var1],axis=0),dims=['time_in_window_idx','tg'],
                                                   coords=dict(time_in_window_idx=var1_var2_data_wdws.time_in_window_idx,tg=var1_var2_data_wdws.tg))
                sorted_var2_ref = xr.DataArray(data=np.sort(var1_var2_data_wdws.sel(window=ref_year)[var2],axis=0),dims=['time_in_window_idx','tg'],
                                              coords=dict(time_in_window_idx=var1_var2_data_wdws.time_in_window_idx,tg=var1_var2_data_wdws.tg))
            #initialize output arrays
            num_extremes_mem['num_joint_extremes_'+var1+'_driven'] = num_extremes_mem['num_joint_extremes'].copy(deep=True)
            num_extremes_mem['num_joint_extremes_'+var2+'_driven'] = num_extremes_mem['num_joint_extremes'].copy(deep=True)
            num_extremes_mem['num_joint_extremes_'+var1+'_'+var2+'_driven'] = num_extremes_mem['num_joint_extremes'].copy(deep=True)
            num_extremes_mem['num_'+var1+'_extremes_refWindow_futT'] = num_extremes_mem['num_'+var1+'_extremes'].copy(deep=True)
            num_extremes_mem['num_'+var2+'_extremes_refWindow_futT'] = num_extremes_mem['num_'+var2+'_extremes'].copy(deep=True)
            
            for w,win in enumerate(var1_var2_data_wdws.window): #loop over each window to do the decomposition
                #2) find the threshold values in the reference period corresponding to the percentile of events exceeding the reference threshold values in the future (var_{U_{var}}^{hist} in the paper)
                var1_eqv_thresholds = sorted_var1_ref.isel(time_in_window_idx=-1*(np.isfinite(var1_peaks_declustered).sum(dim='time_in_window_idx').sel(window=win).load()))
                var2_eqv_thresholds = sorted_var2_ref.isel(time_in_window_idx=-1*(np.isfinite(var2_peaks_declustered).sum(dim='time_in_window_idx').sel(window=win).load()))
                
                #3) determine the peaks above those threshold values in the reference window
                var1_peaks_fut_threshold = var1_var2_data_wdws[var1].sel(window=ref_year).where(var1_var2_data_wdws[var1].sel(window=ref_year)>=var1_eqv_thresholds) #determine the peaks in the reference period above those values
                var2_peaks_fut_threshold = var1_var2_data_wdws[var2].sel(window=ref_year).where(var1_var2_data_wdws[var2].sel(window=ref_year)>=var2_eqv_thresholds)

                #4) determine the joint extremes for different components:
                # a) var2 peaks above standard threshold in reference period, var1 above future threshold percentile in reference period
                joint_extremes_var1_driven = np.isfinite((rolling_max(var2_peaks_declustered.sel(window=ref_year),max_lag*2+1,dim='time_in_window_idx')*var1_peaks_fut_threshold))

                # b) var1 peaks above standard threshold in reference period, var2 above future threshold percentile in reference period
                joint_extremes_var2_driven = np.isfinite((rolling_max(var2_peaks_fut_threshold,max_lag*2+1,dim='time_in_window_idx')*var1_peaks_declustered.sel(window=ref_year)))

                # c) var1 and var 2 above future threshold percentile in reference period
                joint_extremes_var1_var2_driven = np.isfinite((rolling_max(var2_peaks_fut_threshold,max_lag*2+1,dim='time_in_window_idx')*var1_peaks_fut_threshold))
           
                #count per month
                num_extremes_mem['num_joint_extremes_'+var1+'_driven'].loc[dict(window=win)] = sum_num_extremes_pmonth(joint_extremes_var1_driven)
                num_extremes_mem['num_joint_extremes_'+var2+'_driven'].loc[dict(window=win)] = sum_num_extremes_pmonth(joint_extremes_var2_driven)
                num_extremes_mem['num_joint_extremes_'+var1+'_'+var2+'_driven'].loc[dict(window=win)] = sum_num_extremes_pmonth(joint_extremes_var1_var2_driven)
                num_extremes_mem['num_'+var1+'_extremes_refWindow_futT'].loc[dict(window=win)] = sum_num_extremes_pmonth(np.isfinite(var1_peaks_fut_threshold))
                num_extremes_mem['num_'+var2+'_extremes_refWindow_futT'].loc[dict(window=win)] = sum_num_extremes_pmonth(np.isfinite(var2_peaks_fut_threshold))
            
            #changes in magnitude of 95% in each season            
            num_extremes_mem[var1+'_p95'] = xr.concat((var1_var2_data_wdws[var1].where(np.mod(var1_var2_data_wdws.time.dt.month,12)<3).quantile(.95,dim='time_in_window_idx'),
                                                          var1_var2_data_wdws[var1].where((np.mod(var1_var2_data_wdws.time.dt.month,12)>2) & (np.mod(var1_var2_data_wdws.time.dt.month,12)<6)).quantile(.95,dim='time_in_window_idx'),
                                                          var1_var2_data_wdws[var1].where((np.mod(var1_var2_data_wdws.time.dt.month,12)>5) & (np.mod(var1_var2_data_wdws.time.dt.month,12)<9)).quantile(.95,dim='time_in_window_idx'),
                                                          var1_var2_data_wdws[var1].where((np.mod(var1_var2_data_wdws.time.dt.month,12)>8) & (np.mod(var1_var2_data_wdws.time.dt.month,12)<12)).quantile(.95,dim='time_in_window_idx')),dim='season')
            num_extremes_mem[var2+'_p95'] = xr.concat((var1_var2_data_wdws[var2].where(np.mod(var1_var2_data_wdws.time.dt.month,12)<3).quantile(.95,dim='time_in_window_idx'),
                                                          var1_var2_data_wdws[var2].where((np.mod(var1_var2_data_wdws.time.dt.month,12)>2) & (np.mod(var1_var2_data_wdws.time.dt.month,12)<6)).quantile(.95,dim='time_in_window_idx'),
                                                          var1_var2_data_wdws[var2].where((np.mod(var1_var2_data_wdws.time.dt.month,12)>5) & (np.mod(var1_var2_data_wdws.time.dt.month,12)<9)).quantile(.95,dim='time_in_window_idx'),
                                                          var1_var2_data_wdws[var2].where((np.mod(var1_var2_data_wdws.time.dt.month,12)>8) & (np.mod(var1_var2_data_wdws.time.dt.month,12)<12)).quantile(.95,dim='time_in_window_idx')),dim='season')
            num_extremes_mem = num_extremes_mem.assign_coords({'season':['DJF','MAM','JJA','SON']})
            
            #store metadata
            num_extremes_mem['complete_window'] = data_is_complete #store where windows miss data
                        
            num_extremes_mem = num_extremes_mem.expand_dims(dim={"member_id": 1}) #add coordinates & dimensions

            num_extremes_mem.attrs = var1_var2_data.attrs #keep original attributes and add information on the extremes analysis
            num_extremes_mem.attrs['window_length'] = str(window_len)
            num_extremes_mem.attrs['declustering'] = 'Rolling window of '+str(declus_window_len)+' days'
            num_extremes_mem.attrs['allowed_lag'] = str(max_lag)
            num_extremes_mem.attrs['ref_window'] = str(ref_year)
            
            num_extremes_mem.to_netcdf(output_fn.replace('.nc','_'+num_extremes_mem.member_id.values[0]+'.nc'),mode='w')
            num_extremes_mem.close()
    

Processing file: CESM2_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: CESM2_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: CESM2-WACCM_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: CESM2-WACCM_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: CMCC-CM2-SR5_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: CMCC-CM2-SR5_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: CMCC-ESM2_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: CMCC-ESM2_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: EC-Earth3_gr_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: EC-Earth3_gr_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: GFDL-CM4_gr1_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: GFDL-CM4_gr1_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: GFDL-ESM4_gr1_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: GFDL-ESM4_gr1_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: HadGEM3-GC31-MM_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: MIROC6_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: MIROC6_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: MPI-ESM1-2-HR_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: MPI-ESM1-2-HR_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: MRI-ESM2-0_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: MRI-ESM2-0_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: NorESM2-MM_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: NorESM2-MM_gn_day_ssp585.zarr


0it [00:00, ?it/s]

Processing file: TaiESM1_gn_day_ssp245.zarr


0it [00:00, ?it/s]

Processing file: TaiESM1_gn_day_ssp585.zarr


0it [00:00, ?it/s]

```python 
#old way of computing numbers per month
for month in np.arange(1,13): #count number of joint extremes per month
    if month==1: #initialize
        num_joint_extremes_pmonth = joint_extremes.where(joint_extremes.time.dt.month==month).sum(dim='time_in_window_idx')
        num_var1_extremes_pmonth = np.isfinite(var1_peaks_declustered.where(var1_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')
        num_var2_extremes_pmonth = np.isfinite(var2_peaks_declustered.where(var2_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')
    else: #add to results for preceding months
        num_joint_extremes_pmonth = xr.concat((num_joint_extremes_pmonth,joint_extremes.where(joint_extremes.time.dt.month==month).sum(dim='time_in_window_idx')),dim='month')
        num_var1_extremes_pmonth = xr.concat((num_var1_extremes_pmonth,np.isfinite(var1_peaks_declustered.where(var1_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')),dim='month')
        num_var2_extremes_pmonth = xr.concat((num_var2_extremes_pmonth,np.isfinite(var2_peaks_declustered.where(var2_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')),dim='month')

num_joint_extremes_pmonth = num_joint_extremes_pmonth.assign_coords({'month':np.arange(1,13)})                             
num_var1_extremes_pmonth = num_var1_extremes_pmonth.assign_coords({'month':np.arange(1,13)})
num_var2_extremes_pmonth = num_var2_extremes_pmonth.assign_coords({'month':np.arange(1,13)})       
```            

```
sfcWind_dir = '/home/jovyan/CMIP6cf/output/timeseries/sfcWind_europe/'
pr_dir = '/home/jovyan/CMIP6cf/output/timeseries/pr_europe/'

source_ids = list(set(os.listdir(sfcWind_dir)) & set(os.listdir(pr_dir))) #intersection of models

for source_id in ['EC-Earth3']:#[k for k in source_ids if ~k.startswith('.')]: #loop over models
  
    sfcWind_path = os.path.join(sfcWind_dir,source_id)
    pr_path = os.path.join(pr_dir,source_id)
    
    sfcWind_exps = [s.split('_')[-1][0:-3] for s in os.listdir(sfcWind_path) if s.startswith('.')==False]
    pr_exps = [s.split('_')[-1][0:-3] for s in os.listdir(pr_path) if s.startswith('.')==False]
    
    experiment_ids = list(set(sfcWind_exps) & set(pr_exps))

    for experiment_id in experiment_ids: #loop over experiments
        #load data
        fn = fnmatch.filter(os.listdir(sfcWind_path),'*'+experiment_id+'*')[0]
        print('Processing file: '+fn)
        sfcWind_pr = xr.open_mfdataset((os.path.join(sfcWind_path,fn),os.path.join(pr_path,fn)),chunks={'member_id':1,'time':100000,'longitude':5})#.sel(longitude=np.arange(-25,11))
        
        #generate output paths & check if output already exists
        model_path = os.path.join('/home/jovyan/CMIP6cf/output/dependence/sfcWind_pr_europe/40yr_p98_lag0d_declus1d_ref1980',sfcWind_pr.source_id)
        output_fn = os.path.join(model_path,fn)

        if not os.path.exists(model_path):
            os.mkdir(model_path)

        if not overwrite_output: #if not overwriting existing output
            if os.path.exists(output_fn):
                print('Output already exists for this instance.')
                continue
        
        #construct time window indices
        if len(np.unique(sfcWind_pr.time.resample(time='1Y').count()))>1: #remove leap days so that each computation window has the same length
            with dask.config.set(**{'array.slicing.split_large_chunks': True}):
                sfcWind_pr = sfcWind_pr.sel(time=~((sfcWind_pr.time.dt.month == 2) & (sfcWind_pr.time.dt.day == 29))) #^probably (hopefully) only has a small effect on the results
        
        #select DJF:
        days_in_year = int(sfcWind_pr.time.resample(time='1Y').count()[0])
        
        if window_len%2 !=0: #odd
            window_start_idx = days_in_year*(output_yrs-1850-int(np.floor(window_len/2)))
            first_window_idx = np.arange(0*days_in_year,window_len*days_in_year)
        else: #even
            window_start_idx = days_in_year*(output_yrs-1850-int(window_len/2)+1)
            first_window_idx = np.arange(0*days_in_year,window_len*days_in_year)
        
        if np.max(first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:])>=len(sfcWind_pr.time):
            raise Exception('Windows exceed simulation length.')
            
        window_idx = xr.DataArray( #indices of windows
            data=first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:],
            dims=["time_in_window_idx","window"],
            coords=dict(
                time_in_window_idx=first_window_idx,
                window=output_yrs
            ),
        )
        
        for m,member in tqdm(enumerate(sfcWind_pr.member_id)): #loop over members to compute the dependence
            sfcWind_pr_mem = sfcWind_pr.sel(member_id=member).copy(deep=True).load()
            
            sfcWind_pr_wdws = sfcWind_pr_mem.isel(time=window_idx) #select windows
   
            data_is_complete = np.isfinite(sfcWind_pr_wdws.sfcWind).all(dim='time_in_window_idx') * np.isfinite(sfcWind_pr_wdws.pr).all(dim='time_in_window_idx')
            
            pr_hist_threshold = sfcWind_pr_wdws['pr'].sel(window=1980).quantile(threshold,dim='time_in_window_idx')
            sfcWind_hist_threshold = sfcWind_pr_wdws['sfcWind'].sel(window=1980).quantile(threshold,dim='time_in_window_idx')

            pr_peaks = sfcWind_pr_wdws['pr'].where(sfcWind_pr_wdws['pr']>pr_hist_threshold)
            sfcWind_peaks = sfcWind_pr_wdws['sfcWind'].where(sfcWind_pr_wdws['sfcWind']>sfcWind_hist_threshold)
            
            pr_peaks_declustered = pr_peaks.where(pr_peaks==pr_peaks.rolling({'time_in_window_idx':declus_window_len},center=True,min_periods=1).max(skipna=True))
            sfcWind_peaks_declustered = sfcWind_peaks.where(sfcWind_peaks==sfcWind_peaks.rolling({'time_in_window_idx':declus_window_len},center=True,min_periods=1).max(skipna=True))
            
            #decomposition (only works for declustering = 0 and lag =1)?
            
            #sort values in historical period
         
            sorted_pr_1980 = xr.DataArray(data=np.sort(sfcWind_pr_wdws.sel(window=1980).pr,axis=0),dims=['time_in_window_idx','latitude','longitude'],
                                          coords=dict(time_in_window_idx=sfcWind_pr_wdws.time_in_window_idx,latitude=sfcWind_pr_wdws.latitude,longitude=sfcWind_pr_wdws.longitude)).chunk({'longitude':5})
            sorted_sfcWind_1980 = xr.DataArray(data=np.sort(sfcWind_pr_wdws.sel(window=1980).sfcWind,axis=0),dims=['time_in_window_idx','latitude','longitude'],
                                               coords=dict(time_in_window_idx=sfcWind_pr_wdws.time_in_window_idx,latitude=sfcWind_pr_wdws.latitude,longitude=sfcWind_pr_wdws.longitude)).chunk({'longitude':5})

            #derive percentile values based on number of exceedences of historical 98% in the future & determine peaks, #potentially expand this to other windows than just 2080
            pr_eqv_thresholds = sorted_pr_1980.isel(time_in_window_idx=-1*(np.isfinite(pr_peaks_declustered).sum(dim='time_in_window_idx').sel(window=2080).load()))
            wind_eqv_thresholds = sorted_sfcWind_1980.isel(time_in_window_idx=-1*(np.isfinite(sfcWind_peaks_declustered).sum(dim='time_in_window_idx').sel(window=2080).load()))
            
            pr_peaks_fut_threshold = sfcWind_pr_wdws.pr.sel(window=1980).where(sfcWind_pr_wdws.pr.sel(window=1980)>=pr_eqv_thresholds)
            sfcWind_peaks_fut_threshold = sfcWind_pr_wdws.sfcWind.sel(window=1980).where(sfcWind_pr_wdws.sfcWind.sel(window=1980)>=wind_eqv_thresholds)

            #compute number of co_occurring extremes due to different effects

            #co occurring pr fut threshold
            num_co_occurring_pr_driven = np.isfinite((rolling_max(pr_peaks_fut_threshold,max_lag*2+1,dim='time_in_window_idx')*sfcWind_peaks_declustered.sel(window=1980))).sum(dim='time_in_window_idx')

            #co occurring wind fut threshold 
            num_co_occurring_wind_driven = np.isfinite((rolling_max(pr_peaks_declustered.sel(window=1980),max_lag*2+1,dim='time_in_window_idx')*sfcWind_peaks_fut_threshold)).sum(dim='time_in_window_idx')

            #co occurring both new threshold
            num_co_occurring_both_driven = np.isfinite((rolling_max(pr_peaks_fut_threshold,max_lag*2+1,dim='time_in_window_idx')*sfcWind_peaks_fut_threshold)).sum(dim='time_in_window_idx')
           
            
            #pr_peaks_declustered = declustered_peaks(sfcWind_pr_wdws['pr'],threshold,declus_window_len,dim='time_in_window_idx')
            #sfcWind_peaks_declustered = declustered_peaks(sfcWind_pr_wdws['sfcWind'],threshold,declus_window_len,dim='time_in_window_idx')
            
            #count occurrences of peaks
            co_occurring = np.isfinite((rolling_max(pr_peaks_declustered,max_lag*2+1,dim='time_in_window_idx')*sfcWind_peaks_declustered))
            
            for month in np.arange(1,13):
                if month==1:
                    num_co_occurring_pmonth = co_occurring.where(co_occurring.time.dt.month==month).sum(dim='time_in_window_idx')
                    num_pr_peaks_pmonth = np.isfinite(pr_peaks_declustered.where(pr_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')
                    num_sfcWind_peaks_pmonth = np.isfinite(sfcWind_peaks_declustered.where(sfcWind_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')
                else:
                    num_co_occurring_pmonth = xr.concat((num_co_occurring_pmonth,co_occurring.where(co_occurring.time.dt.month==month).sum(dim='time_in_window_idx')),dim='month')
                    num_pr_peaks_pmonth = xr.concat((num_pr_peaks_pmonth,np.isfinite(pr_peaks_declustered.where(pr_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')),dim='month')
                    num_sfcWind_peaks_pmonth = xr.concat((num_sfcWind_peaks_pmonth,np.isfinite(sfcWind_peaks_declustered.where(sfcWind_peaks_declustered.time.dt.month==month)).sum(dim='time_in_window_idx')),dim='month')
                    
            num_co_occurring_pmonth = num_co_occurring_pmonth.assign_coords({'month':np.arange(1,13)})                             
            num_pr_peaks_pmonth = num_pr_peaks_pmonth.assign_coords({'month':np.arange(1,13)})       
            num_sfcWind_peaks_pmonth = num_sfcWind_peaks_pmonth.assign_coords({'month':np.arange(1,13)})       
            
            #dependence_mem = xr.concat([ktau_pr_cdon_sfcWind,ktau_sfcWind_cdon_pr,ktau_both_peaks],dim='extreme_variate').to_dataset(name='ktau')
                
            dependence_mem = num_co_occurring_pmonth.to_dataset(name='num_co_occurring')
            dependence_mem['num_pr_peaks'] = num_pr_peaks_pmonth
            dependence_mem['num_sfcWind_peaks'] = num_sfcWind_peaks_pmonth
           
            dependence_mem['num_co_occurring_pr_driven']   = num_co_occurring_pr_driven
            dependence_mem['num_co_occurring_sfcWind_driven'] = num_co_occurring_wind_driven
            dependence_mem['num_co_occurring_both_driven'] = num_co_occurring_both_driven
            
            dependence_mem['num_co_occurring_pr_driven'].attrs['window']='2061-2100'
            dependence_mem['num_co_occurring_sfcWind_driven'].attrs['window']='2061-2100'
            dependence_mem['num_co_occurring_both_driven'].attrs['window']='2061-2100'
           
            #store thresholds
            dependence_mem['sfcWind_thresholds_annual'] = sfcWind_pr_wdws['sfcWind'].quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            dependence_mem['pr_thresholds_annual'] = sfcWind_pr_wdws['pr'].quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            
            dependence_mem['sfcWind_thresholds_djf'] = sfcWind_pr_wdws['sfcWind'].where(np.mod(sfcWind_pr_wdws.time.dt.month,12)<3).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            dependence_mem['pr_thresholds_djf'] = sfcWind_pr_wdws['pr'].where(np.mod(sfcWind_pr_wdws.time.dt.month,12)<3).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            
            dependence_mem['sfcWind_thresholds_mam'] = sfcWind_pr_wdws['sfcWind'].where((np.mod(sfcWind_pr_wdws.time.dt.month,12)>2) & (np.mod(sfcWind_pr_wdws.time.dt.month,12)<6)).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            dependence_mem['pr_thresholds_mam'] = sfcWind_pr_wdws['pr'].where((np.mod(sfcWind_pr_wdws.time.dt.month,12)>2) & (np.mod(sfcWind_pr_wdws.time.dt.month,12)<6)).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            
            dependence_mem['sfcWind_thresholds_jja'] = sfcWind_pr_wdws['sfcWind'].where((np.mod(sfcWind_pr_wdws.time.dt.month,12)>5) & (np.mod(sfcWind_pr_wdws.time.dt.month,12)<9)).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            dependence_mem['pr_thresholds_jja'] = sfcWind_pr_wdws['pr'].where((np.mod(sfcWind_pr_wdws.time.dt.month,12)>5) & (np.mod(sfcWind_pr_wdws.time.dt.month,12)<9)).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            
            dependence_mem['sfcWind_thresholds_son'] = sfcWind_pr_wdws['sfcWind'].where((np.mod(sfcWind_pr_wdws.time.dt.month,12)>8) & (np.mod(sfcWind_pr_wdws.time.dt.month,12)<12)).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            dependence_mem['pr_thresholds_son'] = sfcWind_pr_wdws['pr'].where((np.mod(sfcWind_pr_wdws.time.dt.month,12)>8) & (np.mod(sfcWind_pr_wdws.time.dt.month,12)<12)).quantile(np.arange(.9,1,.01),dim='time_in_window_idx')
            dependence_mem['complete_window'] = data_is_complete #store where windows miss data
            
            
            dependence_mem = dependence_mem.expand_dims(dim={"member_id": 1}) #add coordinates & dimensions
            #dependence_mem = dependence_mem.assign_coords({'extreme_variate':['sfcWind','pr','both'],'statistic':['coef','p'],'estimator':['cfg','.95']})
            
            if m==0: #concatenate results over member_id's
                dependence = dependence_mem
            else:
                dependence = xr.concat((dependence,dependence_mem),dim='member_id')    
            
            dependence.attrs = sfcWind_pr.attrs #keep original attributes and add information on the extremes analysis
            dependence.attrs['window_length'] = str(window_len)
            dependence.attrs['declustering'] = 'Rolling window of '+str(declus_window_len)+' days'
            dependence.attrs['allowed_lag'] = str(max_lag)
            
            #store for all members of this model & experiment
            dependence.to_netcdf(output_fn,mode='w')
            dependence.close()
            ```