In [13]:
import xclim
import xarray as xr
import cftime
import xclim.sdba as sdba
import glob
import re

In [2]:
# Define a function to standardize time
def standardize_time(ds):
    """
    Decode and standardize the time variable for a single dataset.
    """
    if 'time' in ds.variables:
        # Get the time attributes
        time_units = ds.time.attrs.get('units', None)
        calendar = ds.time.attrs.get('calendar', 'standard')
        
        # Decode the time variable
        if time_units is not None:
            decoded_times = cftime.num2date(ds.time.values, units=time_units, calendar=calendar)
        
        # Assign back to the dataset
        ds['time'] = ('time', decoded_times)

    return ds

In [3]:
observations = xr.open_dataset('O:\\Man\\Public\\sharing-4270-CERM\\MATNEW\\Observation NetCDF\\pr_10km_1989_2019_remapnn_CLIMEX.nc')
observations = observations.rename({"Pr": "pr"})

In [None]:
#File to correct 'time' and 'time_bnds'
model_correct_time = xr.open_dataset("O:\\Man\\Public\\sharing-4270-CERM\\VLYMI\\Climex DK Domain\\pr\\kba\\pr_EUR-11_CCCma-CanESM2_historical_r1-r1i1p1_OURANOS-CRCM5_kba_1h_1955-2100.nc" , decode_times=False)

In [4]:
#Create mask to match pixels between datasets
mask = xr.where(observations.sel(time='1989-01-02').notnull(), 1, 0)

In [5]:
# Ensembles of CLIMEX1
string = 'kbi  kbh  kbg  kbf  kbe  kbd  kbc  kbb  kba  kcx  kcw  kcv  kcu  kct  kcs  kcr  kcq  kcp  kco  kcn  kcm  kcl  kck  kcj  kci  kch  kcg  kcf  kce  kcd  kcc  kcb  kca  kbz  kby  kbx  kbw  kbv  kbu  kbt  kbs  kbr  kbq  kbp  kbo  kbn  kbm  kbl  kbk  kbj  '
ensembles = string.split( )
ensembles.sort()

In [6]:
# Paths
input_path = "O:\\Man\\Public\\sharing-4270-CERM\\VLYMI\\CLIMEX DK Domain\\pr\\"
output_path = "O:\\Man\\Public\\sharing-4270-CERM\\VLYMI\\CLIMEX DK Domain\\pr_bc\\"

In [11]:
%%time
for i, ens in enumerate(ensembles):
    print(ens)
    
    file = glob.glob(input_path + ens + "/pr_*")
    
    print(file)
    
    break
    
    #Grab ensembler member
    model = xr.open_dataset(file , decode_times=False)
    
    #Fix time dimensions
    model['time'] = model_correct_time['time']
    model['time_bnds']= model_correct_time['time_bnds']
    
    #Modify time dimension
    model = standardize_time(model)
    model["time"] = model.indexes["time"].to_datetimeindex()
    
    #Select time period of interest
    model = model.sel(time=slice("1981-01-01", "2025-12-31"))
    
    #Compute daily sums 
    model_daily= model['pr'].resample(time='1D').sum()
    
    #Drop variable to free up RAM
    del model
    
    #Convert to mm/day (don't need to multiple by 24 since the data is summed across all hourly timesteps)
    model_daily = model_daily*3600
    
    #Mask the model data
    mask_expanded_model = mask.expand_dims(time=model_daily.time)
    model_daily = model_daily.where(mask_expanded_model)
    
    #Select the calibration periods and period to apply bias correction
    model_calib = model_daily.sel(time=slice("1989-01-02", "2019-12-31"))['pr']
    obs_calib = observations['pr']
    model_bc = model_daily['pr']
    
    #Set the units 
    model_calib.attrs['units'] = 'mm/day'
    obs_calib.attrs['units'] = 'mm/day'
    model_bc.attrs['units'] = 'mm/day'
    
    #Apply QDM bias correction 
    qdm = sdba.QuantileDeltaMapping.train(ref=obs_calib, hist=model_calib, group = 'time')
    corrected_qdm = qdm.adjust(model_bc)
    
    # Apply EVT (GPD) to the tails (extremes) of data
    extremes = sdba.ExtremeValues.train(ref=obs_calib, hist=model_calib,q_thresh=0.95, cluster_thresh = "7.5 mm/day")
    corrected_with_extremes = extremes.adjust(sim = model_bc, scen = corrected_qdm)
    
    #Remove drizzle effect in RCM outputs 
    corrected_with_extremes.values[corrected_with_extremes.values <= 0.3] = 0
    
    #Transpose the datarray (fix issue with BC method switching position of latitude and longitude)
    corrected_with_extremes = corrected_with_extremes.transpose("time", "rlat", "rlon")
    
    #Rename variable from 'scen' to 'pr'
    corrected_with_extremes = corrected_with_extremes.rename("pr")
    
    #Create output directory
    output_dir = f"{output_path}{ens}"
    
    #Create folder path in directroy
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    #Grab model name string
    match = re.search(r'pr_.*?-CRCM5', file[0])
    
    name = match.group(0)
    
    #Save final output name
    output_path = os.path.join(output_dir, f"{name}_{ens}_day_1981-2025_BC.nc")
    
    #Export NetCDF
    corrected_with_extremes.to_netCDF(output_path)

kba
['O:\\Man\\Public\\sharing-4270-CERM\\VLYMI\\CLIMEX DK Domain\\pr\\kba\\pr_EUR-11_CCCma-CanESM2_historical_r1-r1i1p1_OURANOS-CRCM5_kba_1h_1955-2100.nc']
CPU times: total: 0 ns
Wall time: 14.5 ms
