In [1]:
%matplotlib inline
import xarray as xr
import scipy as sp
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os 
import gcsfs

import dask.distributed as dd
import dask
import rhg_compute_tools.kubernetes as rhgk

In [2]:
fs = gcsfs.GCSFileSystem(token='/opt/gcsfuse_tokens/impactlab-data.json')

In [None]:
client, cluster = rhgk.get_standard_cluster()
cluster

In [None]:
# ! pip install -e /home/jovyan/downscaling/xclim/

In [3]:
# ! pip install xclim 
from xclim import sdba

In [4]:
from xclim.sdba.adjustment import AnalogQuantilePreservingDownscaling, QuantileDeltaMapping
from xclim.sdba.utils import equally_spaced_nodes
from xclim.core.calendar import convert_calendar
from xclim import set_options

In [5]:
def convert_to_noleap_calendar(ds, target='noleap'):
    ds_noleap = convert_calendar(ds, target=target)
    return ds_noleap

In [6]:
def compute_day_ind(coarse, dim='time', variable='tas', num_quantiles=620): 
    """
    function that implements analog downscaling method by computing day indices that can be used to index into the coarse and fine CDFs 
    coarse: time series of coarse data for a day group for a single gridcell 
    """
    
    x = np.arange(0, num_quantiles)
    coarse_quantiles = x / float(len(x))
    
    # coarse_quantile_values = coarse.quantile(q=coarse_quantiles, dim='time')
    
    day_inds = coarse.argsort(axis=0)
    
    return day_inds

set up test data. Using the full bias correction time period for part of Washington state from ERA-5 1/4 degree data. 

In [None]:
direc = '/gcs/impactlab-data/climate/source_data/ERA-5/tasmax/daily/netcdf/v1.1'
years = np.arange(1994, 2015)
filenames = [os.path.join(direc, 'tmax_daily_%s-%s.nc' %(year, year)) for year in years]
ref_fine = xr.open_mfdataset(filenames)
ref_fine = convert_calendar(ref_fine, target='noleap')

In [None]:
# temp = xr.open_mfdataset(filenames)

In [None]:
'''# small Washington state mask 
min_lon = -124
min_lat = 47
max_lon = -122
max_lat = 49

mask_lon = (temp.longitude >= min_lon) & (temp.longitude <= max_lon)
mask_lat = (temp.latitude >= min_lat) & (temp.latitude <= max_lat)'''

In [None]:
# temp.coords['longitude'] = (temp.coords['longitude'] + 180) % 360 - 180

In [7]:
'''tas_test = temp.where(mask_lon & mask_lat, drop=True)
tas_eager = tas_test.load()
tas_eager.to_netcdf('/home/jovyan/analog_method_prototype_wastate_era5_data.nc')'''
tas_eager = xr.open_dataset('/home/jovyan/analog_method_prototype_wastate_era5_data.nc')
tas_eager = convert_calendar(tas_eager, target='noleap')

test my `xclim` implementation!!!!!

In [8]:
# using the same gridcell as coarse and fine, just want to see if it runs 
'''ref = tas_eager['tas'][:, 0, 0]
hist = tas_eager['tas'][:, 0, 0] + 3
sim = tas_eager['tas'][:, 0, 0] + 5'''
ref = tas_eager['tas']
hist = tas_eager['tas'] + 3
sim = tas_eager['tas'] + 5

In [9]:
ref

In [None]:
with set_options(sdba_extra_output=True):
    quantiles = equally_spaced_nodes(620, eps=None)
    QDM = QuantileDeltaMapping(kind='+', nquantiles=quantiles, 
                               group=sdba.Grouper("time.dayofyear", window=31))
    QDM.train(ref, hist)
    fake_biascorrected = QDM.adjust(sim)

In [None]:
fake_biascorrected

In [None]:
fake_biascorrected = fake_biascorrected['scen'].assign_coords(sim_q=fake_biascorrected.sim_q).to_dataset()

In [None]:
if 'sim_q' in fake_biascorrected.coords: 
    print("you got your quantiles in here buddy")

In [10]:
AIQPD = AnalogQuantilePreservingDownscaling(kind='+', nquantiles=620, 
                                            group=sdba.Grouper("time.dayofyear", window=31))

In [14]:
ref

In [11]:
AIQPD.train(ref, hist)
AIQPD

AnalogQuantilePreservingDownscaling(nquantiles=620, kind='+', group=Grouper(add_dims=['window'], name='time.dayofyear', window=31, interp=False))

In [None]:
AIQPD.ds

In [None]:
QDM.ds.af

In [None]:
downscaled = AIQPD.adjust(fake_biascorrected)

In [None]:
# downscaled

In [None]:
plt.plot(downscaled)

# plt.plot(fake_biascorrected['scen'])
plt.plot(fake_biascorrected)

have to bias correct `sim` to be able to have quantiles to use in `adjust()`

In [None]:
AIQPD.ds

In [None]:
# QDM.ds

In [None]:
AIQPD.adjust(fake_biascorrected['scen'], sim_q=fake_biascorrected.sim_q)

In [None]:
# AIQPD.ds.af.isel(dayofyear=60).plot()

now compare to prototype notebook implementation of analog method 

compare for one day group without a window (so just 20 data points)

In [None]:
ref_daygroups = ref.groupby('time.dayofyear')
hist_daygroups = hist.groupby('time.dayofyear')

In [None]:
AIQPD_day = AnalogQuantilePreservingDownscaling(kind='+', nquantiles=20, group=sdba.Grouper("time.dayofyear"))

In [None]:
AIQPD_day.train(ref, hist)
AIQPD_day

In [None]:
day_inds = compute_day_ind(ref_daygroups[100])
ref_fine_100_test = hist_daygroups[100][day_inds.values]

In [None]:
plt.plot(AIQPD_day.ds.af.sel(dayofyear=100).quantiles, hist_daygroups[100][day_inds.values] - ref_daygroups[100][day_inds.values])

AIQPD_day.ds.af.sel(dayofyear=100).plot()

made some actual adjustment factors. Make them at 1-degree because it's easier at first 

In [None]:
store_filename = 'gs://impactlab-data/climate/downscaling/dc6-dev-75bpr/reference-rechunk.zarr' 
store = fs.get_mapper(store_filename, check=False)
ref_coarse = xr.open_zarr(store, consolidated=False)

In [None]:
# ref_coarse

In [None]:
ref_fine_1deg = ref_fine.rename({'latitude': 'lat', 'longitude': 'lon'}).reindex(lat=ref_coarse.lat, lon=ref_coarse.lon, method='nearest')

In [None]:
ref_fine_1deg = ref_fine_1deg.chunk({'time': -1, 'lat': 10, 'lon': 10})

In [None]:
ref_fine_1deg_slice = ref_fine_1deg.sel(time=slice('1995-01-01','2014-12-31'))
ref_coarse_slice = ref_coarse.sel(time=slice('1995-01-01','2014-12-31'))

In [None]:
# west coast mask 
min_lon = -125
min_lat = 30
max_lon = -110
max_lat = 47

mask_lon = (ref_fine_1deg_slice.lon >= min_lon) & (ref_fine_1deg_slice.lon <= max_lon)
mask_lat = (ref_fine_1deg_slice.lat >= min_lat) & (ref_fine_1deg_slice.lat <= max_lat)

In [None]:
ref_fine_1deg_slice_wc = ref_fine_1deg_slice.where(mask_lon & mask_lat, drop=True).load()

In [None]:
ref_coarse_slice_wc = ref_coarse_slice.where(mask_lon & mask_lat, drop=True).load()

In [None]:
AIQPD = AnalogQuantilePreservingDownscaling(kind='+', nquantiles=620, group=sdba.Grouper("time.dayofyear", window=31))

In [None]:
AIQPD.train(ref_coarse_slice_wc['tasmax'], ref_fine_1deg_slice_wc['tmax'])
AIQPD

In [None]:
# AIQPD.ds.af

In [None]:
doy_af = AIQPD.ds.af.isel(dayofyear=220)

In [None]:
doy_af.isel(quantiles=400).plot(robust=True)

treat first 620 data points as a day group to see if lack of data points is the problem

In [None]:
AIQPD_day = AnalogQuantilePreservingDownscaling(kind='+', nquantiles=620, group=sdba.Grouper("time.dayofyear"))

need some smaller arrays for testing scaling up to 366 day groups. First use a single gridcell, then try a few gridcells. E.g. `tas_gc` and `tas_severalgc`

In [None]:
x = np.arange(0, 620)
coarse_quantiles = x / float(len(x))

In [None]:
u = tas_severalgc.groupby('time.dayofyear').map(compute_day_ind)

# tas_gc.quantile(q=coarse_quantiles)
tas_severalgc.rank(dim='time')
argsorted = tas_severalgc.argsort(axis=0)
tas_sorted = xr.apply_ufunc(np.take_along_axis, tas_severalgc, kwargs={"axis": 0, "indices": argsorted})

tas_severalgc.quantile(q=coarse_quantiles, dim='time') - tas_sorted[:620, :, :].values

test_sort = np.take_along_axis(tas_severalgc.values, tas_severalgc.argsort(axis=0).values, axis=0)