In [2]:
import numpy as np
import xarray as xr
import os
import fnmatch
from tqdm.autonotebook import tqdm
import dask
import sys
sys.path.insert(0, '/home/jovyan/CMIP6cf/cmip6cf/')

from dependence_metrics import kendallstau, utdc_at_threshold, utdc_cfg

  from tqdm.autonotebook import tqdm


In [6]:
def pot(da,threshold,dim):
    
    assert (threshold>=0) & (threshold<1)
    
    return da.where(da>da.quantile(threshold,dim=dim))

def declustered_peaks(da,threshold,window_len,dim):
    #computes peaks above threshold of xr.DataArray and declusters them with a rolling window.
    
    peaks = pot(da,threshold,dim)
    
    return peaks.where(peaks==peaks.rolling({dim:window_len},center=True,min_periods=1).max(skipna=True))

def rolling_max(da,window_len,dim):
    return da.rolling({dim:window_len},center=True,min_periods=1).max()

def compute_ktau_in_windows(da1,da2,dim):

    ktau = xr.apply_ufunc(
                    kendallstau, da1, da2,
                    input_core_dims=[[dim], [dim]], #core dimension: time, loop over the others
                    output_core_dims=[["statistic"]], #outputs tau and p
                    vectorize=True, 
                    dask='allowed', #allow calculating in chunks (dask='parallelized' doesn't work)
                    output_dtypes=[float],
                    output_sizes={"statistic": 2}, #output must be numpy array
                    )
    return ktau

def compute_utdc_in_windows(da1,da2,dim,estimator):

    if estimator=='cfg':
        utdc = xr.apply_ufunc(
                    utdc_cfg, da1, da2,
                    input_core_dims=[[dim], [dim]], #core dimension: time, loop over the others
                    output_core_dims=[["statistic"]], #outputs tau and p
                    vectorize=True, 
                    dask='allowed', #allow calculating in chunks (dask='parallelized' doesn't work)
                    output_dtypes=[float],
                    output_sizes={"statistic": 2}, #output must be numpy array
                    )
    elif estimator=='threshold':
        utdc = xr.apply_ufunc(
                    utdc_at_threshold, da1, da2, #uses .95% by default
                    input_core_dims=[[dim], [dim]], #core dimension: time, loop over the others
                    output_core_dims=[["statistic"]], #outputs tau and p
                    vectorize=True, 
                    dask='allowed', #allow calculating in chunks (dask='parallelized' doesn't work)
                    output_dtypes=[float],
                    output_sizes={"statistic": 2}, #output must be numpy array
                    )
    else:
        raise Except('Estimator'+ str(estimator) +' not implemented.') 
    
    #to-do: implement p-value for utdc!
    
    return utdc

Configure the bivariate sampling:

In [3]:
max_lag = 2 #days
declus_window_len = 3 #days
threshold = .97 #quantile

output_yrs = np.arange(1865,2100,20)
window_len=30 #may need to increase? indicate settings in output folder?
utdc_threshold = .95

overwrite_output = True

Analyze surge & pr, looping over models & experiments:

In [8]:
surge_dir = '/home/jovyan/CMIP6cf/output/surge_timeseries/'
pr_dir = '/home/jovyan/CMIP6cf/output/pr_timeseries/'

source_ids = list(set(os.listdir(surge_dir)) & set(os.listdir(pr_dir))) #intersection of models

for source_id in [k for k in source_ids if 'HadGEM' in k]: #loop over models
    
    surge_path = os.path.join(surge_dir,source_id)
    pr_path = os.path.join(pr_dir,source_id)
    
    surge_exps = [s.split('_')[-1][0:-3] for s in os.listdir(surge_path) if s.startswith('.')==False]
    pr_exps = [s.split('_')[-1][0:-3] for s in os.listdir(pr_path) if s.startswith('.')==False]
    
    experiment_ids = list(set(surge_exps) & set(pr_exps))

    for experiment_id in experiment_ids: #loop over experiments
        #load data
        fn = fnmatch.filter(os.listdir(surge_path),'*'+experiment_id+'*')[0]
        print('Processing file: '+fn)
        surge_pr = xr.open_mfdataset((os.path.join(surge_path,fn),os.path.join(pr_path,fn)),chunks={'member_id':1,'time':100000,'tg':109})
        
        #generate output paths & check if output already exists
        model_path = os.path.join('/home/jovyan/CMIP6cf/output/surge_pr_dependence/',surge_pr.source_id)
        output_fn = os.path.join(model_path,fn)

        if not os.path.exists(model_path):
            os.mkdir(model_path)

        if not overwrite_output: #if not overwriting existing output
            if os.path.exists(output_fn):
                print('Output already exists for this instance.')
                continue
        
        #construct time window indices
        if len(np.unique(surge_pr.time.resample(time='1Y').count()))>1: #remove leap days so that each computation window has the same length
            with dask.config.set(**{'array.slicing.split_large_chunks': True}):
                surge_pr = surge_pr.sel(time=~((surge_pr.time.dt.month == 2) & (surge_pr.time.dt.day == 29))) #^probably (hopefully) only has a small effect on the results
        
        days_in_year = int(surge_pr.time.resample(time='1Y').count()[0])
        
        if window_len%2 !=0: #odd
            window_start_idx = days_in_year*(output_yrs-1850-int(np.floor(window_len/2)))
            first_window_idx = np.arange(0*days_in_year,window_len*days_in_year)
        else: #even
            window_start_idx = days_in_year*(output_yrs-1850-int(window_len/2)+1)
            first_window_idx = np.arange(0*days_in_year,window_len*days_in_year)
        
        if np.max(first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:])>=len(surge_pr.time):
            raise Exception('Windows exceed simulation length.')
            
        window_idx = xr.DataArray( #indices of windows
            data=first_window_idx[:,np.newaxis]+window_start_idx[np.newaxis,:],
            dims=["time_in_window_idx","window"],
            coords=dict(
                time_in_window_idx=first_window_idx,
                window=output_yrs
            ),
        )
        
        for m,member in tqdm(enumerate(surge_pr.member_id)): #loop over members to compute the dependence
            surge_pr_mem = surge_pr.sel(member_id=member).copy(deep=True).load()
            
            surge_pr_wdws = surge_pr_mem.isel(time=window_idx) #select windows
            data_is_complete = np.isfinite(surge_pr_wdws.surge).all(dim='time_in_window_idx') * np.isfinite(surge_pr_wdws.pr).all(dim='time_in_window_idx')
            
            pr_peaks_declustered = declustered_peaks(surge_pr_wdws['pr'],threshold,declus_window_len,dim='time_in_window_idx')
            surge_peaks_declustered = declustered_peaks(surge_pr_wdws['surge'],threshold,declus_window_len,dim='time_in_window_idx')
            
            #compute rank correlations with lag
            ktau_pr_cdon_surge = compute_ktau_in_windows(surge_peaks_declustered,
                                                         rolling_max(surge_pr_wdws['pr'],max_lag*2+1,dim='time_in_window_idx'),
                                                         dim="time_in_window_idx")

            ktau_surge_cdon_pr = compute_ktau_in_windows(pr_peaks_declustered,
                                                         rolling_max(surge_pr_wdws['surge'],max_lag*2+1,dim='time_in_window_idx'),
                                                         dim="time_in_window_idx")

            ktau_both_peaks = compute_ktau_in_windows(surge_peaks_declustered,
                                                         rolling_max(pr_peaks_declustered,max_lag*2+1,dim='time_in_window_idx'),
                                                         dim="time_in_window_idx")
            
            #compute UTDC's from daily pairs
            cfg_utdc = compute_utdc_in_windows(surge_pr_wdws['surge'],surge_pr_wdws['pr'],dim="time_in_window_idx",estimator='cfg')
            
            threshold_utdc = compute_utdc_in_windows(surge_pr_wdws['surge'],surge_pr_wdws['pr'],dim="time_in_window_idx",estimator='threshold')
            
            dependence_mem = xr.concat([ktau_pr_cdon_surge,ktau_surge_cdon_pr,ktau_both_peaks],dim='extreme_variate').to_dataset(name='ktau')
            dependence_mem['utdc'] = xr.concat([cfg_utdc,threshold_utdc],dim='estimator')
            dependence_mem['complete_window'] = data_is_complete #store where windows miss data
            
            dependence_mem = dependence_mem.expand_dims(dim={"member_id": 1}) #add coordinates & dimensions
            dependence_mem = dependence_mem.assign_coords({'extreme_variate':['surge','pr','both'],'statistic':['coef','p'],'estimator':['cfg','.95']})
            
            if m==0: #concatenate results over member_id's
                dependence = dependence_mem
            else:
                dependence = xr.concat((dependence,dependence_mem),dim='member_id')    
            
            dependence.attrs = surge_pr.attrs #keep original attributes and add information on the extremes analysis
            dependence.attrs['window_length'] = str(window_len)
            dependence.attrs['declustering'] = 'Rolling window of '+str(declus_window_len)+' days'
            dependence.attrs['allowed_lag'] = str(max_lag)
            
            #store for all members of this model & experiment
            #dependence.to_netcdf(output_fn,mode='w')
            #dependence.close()
           

Processing file: HadGEM3-GC31-MM_gn_day_ssp585.nc


0it [00:00, ?it/s]

  result = np.apply_along_axis(_nanquantile_1d, axis, a, q,


In [12]:
np.isfinite(pr_peaks_declustered).sum(dim='time_in_window_idx')

In [12]:
surge_pr_mem.time[90359]

In [None]:
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
%config InlineBackend.figure_format = 'retina'

fig=plt.figure()

ax = plt.subplot(111,projection=ccrs.Robinson(central_longitude=0))
ax.add_feature(cartopy.feature.OCEAN, zorder=0,facecolor='white')
ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor='grey')

dKtau = dependence.ktau.sel(extreme_variate='pr',statistic='coef',window=2085).mean(dim='member_id')-dependence.ktau.sel(extreme_variate='pr',statistic='coef',window=2005).mean(dim='member_id')

sc=ax.scatter(dependence.lon,dependence.lat,c=dKtau,cmap='seismic',vmin=-.4,vmax=.4,s=20,transform=ccrs.PlateCarree(),zorder=3)
ax.set_extent([-15, 12, 35, 65], crs=ccrs.PlateCarree())
      
cax=inset_axes(ax,width="100%", height="100%",bbox_to_anchor=(0.04, -.1,1,.075),bbox_transform=ax.transAxes)
cb=fig.colorbar(sc, cax=cax,orientation='horizontal',label='Kendalls Tau [-]')
   
    


In [None]:
fig=plt.figure()

ax = plt.subplot(111,projection=ccrs.Robinson(central_longitude=0))
ax.add_feature(cartopy.feature.OCEAN, zorder=0,facecolor='white')
ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor='grey')

sc=ax.scatter(dependence.lon,dependence.lat,c=dependence.ktau.sel(extreme_variate='surge',window=2005,statistic='coef').mean(dim='member_id'),cmap='seismic',vmin=-.4,vmax=.4,s=20,transform=ccrs.PlateCarree(),zorder=3)
ax.set_extent([-15, 12, 35, 65], crs=ccrs.PlateCarree())
      
cax=inset_axes(ax,width="100%", height="100%",bbox_to_anchor=(0.04, -.1,1,.075),bbox_transform=ax.transAxes)
cb=fig.colorbar(sc, cax=cax,orientation='horizontal',label='Kendalls Tau [-]')
   