In [1]:
%matplotlib inline
import xarray as xr
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os 

from adlfs import AzureBlobFileSystem
import xesmf as xe
from time import gmtime, strftime

define fs mapper below (fill in acct key)

In [2]:
fs = AzureBlobFileSystem(
    account_name=None,
    account_key=None,
    client_id=os.environ.get("AZURE_CLIENT_ID", None),
    client_secret=os.environ.get("AZURE_CLIENT_SECRET", None),
    tenant_id=os.environ.get("AZURE_TENANT_ID", None),
)

load example domain file (from RASM, but it's essentially the same as CESM - RASM has the same structure as CESM, just different component models)

In [3]:
domain_rasm = xr.open_dataset('./domain.lnd.wr50a_ar9v4.100920.nc')

define domain grids using `xesmf`

In [3]:
def create_domain_dataset(resolution=1, lat_name='lat', lon_name='lon'):
    grid_global = xe.util.grid_global(resolution, resolution)
    domain = grid_global.rename({"x": lon_name, "y": lat_name})

    domain[lat_name] = np.unique(domain[lat_name].values)
    domain[lon_name] = np.unique(domain[lon_name].values)
    domain['lon_b'] = np.unique(domain['lon_b'].values)
    domain['lat_b'] = np.unique(domain['lat_b'].values)
    
    attrs = {'title': 'Rhodium/CIL bias correction and downscaling %s-degree grid' %str(resolution), 
         'history': 'created by Diana Gergel, %s' %strftime("%Y-%m-%d %H:%M:%S", gmtime()), 
         'source code': 'grid specified by xesmf, xe.util.grid_global(%s, %s)' %(resolution, resolution)}
    domain.attrs.update(attrs)
    domain['lon'].attrs.update({'long_name': 'longitude of grid cell center', 'units': 'degrees_east'})
    domain['lat'].attrs.update({'long_name': 'latitude of grid cell center', 'units': 'degrees_north'})
    domain['lon_b'].attrs.update({'long_name': 'longitude bounds', 'units': 'degrees_east'})
    domain['lat_b'].attrs.update({'long_name': 'latitude bounds', 'units': 'degrees_north'})
    
    return domain 

In [4]:
domain_coarse = create_domain_dataset(resolution=1)

In [5]:
domain_fine = create_domain_dataset(resolution=0.25)

save as NetCDFs and zarrs 

In [6]:
domain_coarse_filename = '/home/azureuser/cloudfiles/code/Users/dgergel/domain.1x1.nc'
domain_fine_filename = '/home/azureuser/cloudfiles/code/Users/dgergel/domain.0p25x0p25.nc'

In [7]:
coarse_zarr = "support/domain.1x1.zarr"
fine_zarr = "support/domain.0p25x0p25.zarr"
coarse_store = fs.get_mapper(coarse_zarr, check=False)
fine_store = fs.get_mapper(fine_zarr, check=False)

In [8]:
domain_coarse.to_netcdf(domain_coarse_filename)
domain_fine.to_netcdf(domain_fine_filename)

domain_coarse.to_zarr(coarse_store, consolidated=True, mode="w")
domain_fine.to_zarr(fine_store, consolidated=True, mode="w")

<xarray.backends.zarr.ZarrStore at 0x7fcaf1fdfc90>

test regridding and saved domain file to ensure that regridder service updates in dodola will address rechunking service issues 

In [9]:
# regrid an ERA-5 file to the domain file grid 
era5 = xr.open_dataset(os.path.join('/home/azureuser/cloudfiles/code/Users/dgergel', 'temp_hourly_example.nc'))

In [12]:
def validate_domain_file(filepath, test_ds, zarr=False):
    if zarr:
        domain = xr.open_zarr(filepath)
    else: 
        domain = xr.open_dataset(filepath)
        
    regridder = xe.Regridder(test_ds.rename({'latitude': 'lat', 'longitude': 'lon'}), domain, method='bilinear')
    regridded_ds = regridder(test_ds['t2m'])
    
    return regridded_ds

In [13]:
era5_regridded_fine = validate_domain_file(domain_fine_filename, era5)

  keep_attrs=keep_attrs,


In [14]:
era5_regridded_zarr_fine = validate_domain_file(fine_zarr, era5, zarr=True)

  keep_attrs=keep_attrs,


In [18]:
era5_regridded_zarr_coarse = validate_domain_file(coarse_zarr, era5, zarr=True)

  keep_attrs=keep_attrs,


In [None]:
era5_regridded_coarse = validate_domain_file(domain_coarse_filename, era5)

In [15]:
era5_regridded_zarr_fine