In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import collections
import os 

import warnings
warnings.filterwarnings("ignore")

import xesmf as xe

from utils import _convert_lons, _remove_leap_days, compute_daily_climo, calculate_anomaly
from regridding import apply_weights

import dask.distributed as dd
import dask_kubernetes as dk
import dask
import rhg_compute_tools.kubernetes as rhgk

### This notebook is a test of all the steps for Spatial Disaggregation to get a handle on the total CPU time it will take for this part of BCSD. 

Once-off steps: 

1. compute multi-decade daily climatologies of ERA-5 at obs-res and coarsen it to model-res (they, e.g. NASA-NEX, do not say how, we will do bilinear for consistency with later step)

Per model/scenario/experiment steps:

1. subtract (or divide for precip) BC’ed model data at model-res from obs climo at model resolution to calculate a “scaling factor” 
2. bilinearly interpolate “scaling factor” (using xESMF) from the model grid to the obs grid 
3. Apply scaling factor by adding (for temp) and multiplying (for precip) the “scaling factor” to the obs-res daily climatology 

NOTE: For the purpose of being conservative with timing, the "coarsen obs climatology step to model-res" is in the per model/scenario/experiment step, since we don't know for sure how/if CMIP6 models will be at exactly the same resolution. 

Currently this workflow is only built out for temperature, not precipitation. All steps are included, the last step (applying the interpolated scale factor to the obs-res daily climatology) has not yet been tested. All other parts of the workflow have been tested. The second to last step, the interpolation of the scaling factor from coarse to fine, is the most memory intensive, thus I have only tested for a subset of timesteps. 

In [2]:
client, cluster = rhgk.get_standard_cluster()
cluster

VBox(children=(HTML(value='<h2>KubeCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    .…

load test bias corrected output from global bias correction prototype notebook (BC'ed NASA GISS CMIP6 data)

In [3]:
def _convert_ds_longitude(ds, lon_name='longitude'):
    ds_new = ds.assign_coords(lon=(((ds[lon_name] + 180) % 360) - 180)).sortby(lon_name)
    return ds_new

In [4]:
workdir = '/gcs/rhg-data/climate/downscaled/workdir'

In [5]:
year = 1990

In [6]:
tmax_model = xr.open_dataset(os.path.join(workdir, 
                                          'global_bias_correction_scaling_test.nc'))

In [8]:
tmax_model = tmax_model.loc[dict(time=slice("%s-01-01" %str(year), "%s-12-31" %str(year)))]

In [9]:
tmax_obs = xr.open_dataset(os.path.join('/gcs/rhg-data/climate/source_data/GMFD/tmax', 
                                         'tmax_0p25_daily_1990-1990.nc')).squeeze(drop=True
                                          ).rename({'latitude': 'lat', 'longitude': 'lon'})

In [10]:
# standardize longitudes 
tmax_obs = _convert_ds_longitude(tmax_obs, lon_name='lon')

Remove leap days from obs 

In [11]:
# remove leap days 
tmax_obs = _remove_leap_days(tmax_obs)

Load daily obs climatology 

In [12]:
climo_obs_fine = (xr.open_dataset(os.path.join(workdir, 'gmfd_test_climo.nc'))
                 .rename({'latitude': 'lat', 'longitude': 'lon'}))

### Interpolate obs climo: fine -> coarse 

In [13]:
%%time 
obs_to_mod_weights = workdir + '/obs_to_mod_bilinear_spatial_disagg.nc'
regridder_obs_to_mod = xe.Regridder(tmax_obs.isel(time=0), tmax_model.isel(time=0), 
                         'bilinear', filename=obs_to_mod_weights, reuse_weights=True)

Reuse existing file: /gcs/rhg-data/climate/downscaled/workdir/obs_to_mod_bilinear_spatial_disagg.nc
CPU times: user 30.5 ms, sys: 19.3 ms, total: 49.8 ms
Wall time: 197 ms


In [14]:
%%time
climo_obs_coarse_lazy = xr.map_blocks(apply_weights, regridder_obs_to_mod, 
                                args=[climo_obs_fine['tmax']])

CPU times: user 1.1 s, sys: 3.05 s, total: 4.15 s
Wall time: 9.52 s


In [15]:
%%time 
climo_obs_coarse = climo_obs_coarse_lazy.compute()

CPU times: user 417 µs, sys: 0 ns, total: 417 µs
Wall time: 423 µs


### Compute scaling factor by subtracting for temperature, dividing for precip, the BC'ed model data at model-res from obs climo at model-res. 

In [16]:
chunks = {'lat': 75, 'lon': 75}
climo_obs_coarse = climo_obs_coarse.chunk(chunks)

In [17]:
%%time 
scale_factor_coarse = xr.map_blocks(calculate_anomaly, 
                                    tmax_model, args=[climo_obs_coarse, 'tasmax'])
sfc = scale_factor_coarse.compute()

CPU times: user 18.4 s, sys: 898 ms, total: 19.3 s
Wall time: 19.5 s


### Interpolate scaling factor: coarse (model grid) -> fine (obs grid)

In [18]:
%%time
mod_to_obs_weights = workdir + '/mod_to_obs_bilinear_spatial_disagg.nc'
regridder_mod_to_obs = xe.Regridder(tmax_model.isel(time=0), 
                                    tmax_obs.isel(time=0), 
                         'bilinear', filename=mod_to_obs_weights, reuse_weights=True)

Reuse existing file: /gcs/rhg-data/climate/downscaled/workdir/mod_to_obs_bilinear_spatial_disagg.nc
CPU times: user 103 ms, sys: 196 ms, total: 299 ms
Wall time: 950 ms


In [19]:
%%time
sfc = sfc.drop('dayofyear')
sff_lazy = xr.map_blocks(apply_weights, regridder_mod_to_obs, 
                                args=[sfc])
sff_lazy_compute = sff_lazy.compute()

CPU times: user 4.45 s, sys: 4.17 s, total: 8.62 s
Wall time: 8.05 s


### Add (or multiply for precip) the scaling factor to the obs-res daily climatology

In [20]:
sff_ds = sff_lazy_compute.to_dataset(name='scale_factor_fine')

In [21]:
def apply_scale_factor(da, obs_climo, groupby_type):
    
    '''if sum(ds.shape) == 0:
        return ds'''
    
    sff_daily = da.groupby(groupby_type)
    return sff_daily + obs_climo

In [None]:
'''year = 1990
sff_ds_year = sff_ds.sel(time=sff_ds.time.dt.year.isin([year]))
mod_yr_downscaled = apply_scale_factor(sff_ds_year['scale_factor_fine'], climo_obs_fine['tmax'], 
                                       sff_ds_year.time.dt.dayofyear)'''

In [None]:
'''filepath = '/home/jovyan/spatial_disagg/sd_%s.nc' %str(year)
mod_yr_ds = mod_yr_downscaled.to_dataset(name='downscaled')
mod_yr_ds.to_netcdf(filepath)
print("finished %s" %str(year))'''

In [None]:
for year in np.unique(sff_ds.time.dt.year):
    sff_ds_year = sff_ds.sel(time=sff_ds.time.dt.year.isin([year]))
    mod_yr_downscaled = apply_scale_factor(sff_ds_year['scale_factor_fine'], climo_obs_fine['tmax'], 
                                           sff_ds_year.time.dt.dayofyear)
    filepath = workdir + '/spatial_disagg_prototype/%s.nc' %str(year)
    mod_yr_downscaled.to_netcdf(filepath)
    print("finished %s" %str(year))

In [22]:
%%time
sff_chunks = {'time': 15}
sff_ds = sff_ds.chunk(sff_chunks)

CPU times: user 4.4 s, sys: 14.6 ms, total: 4.42 s
Wall time: 4.19 s


In [23]:
%%time 
model_ds = xr.map_blocks(apply_scale_factor, sff_ds['scale_factor_fine'], 
                         args=[climo_obs_fine['tmax'], sff_ds.time.dt.dayofyear], 
                         template=sff_ds['scale_factor_fine'])

CPU times: user 57.2 s, sys: 1.67 s, total: 58.9 s
Wall time: 55.4 s


In [24]:
model_downscaled = model_ds.to_dataset(name='downscaled')

In [None]:
model_downscaled.to_netcdf(workdir + '/spatial_disagg_prototype/%s.nc' %str(year))
print("finished writing")

### Apply standardizing functions for final output and save (probably as zarr array)