Ref: https://github.com/NCAR/cesm-lens-aws/issues/34

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import xarray as xr
import intake
from tqdm.auto import tqdm
import dask
from ncar_jobqueue import NCARCluster
from distributed import Client
from utils import _restore_non_dim_coords, preprocess, show_ds_info, print_ds_info, save_data, zarr_store
xr.set_options(display_style='html')
dask.config.set({'distributed.dashboard.link': '/proxy/{port}/status'})
dask.config.get('distributed.dashboard')

{'link': '/proxy/{port}/status', 'export-tool': False}

In [3]:
cluster = NCARCluster(cores=2)
#cluster.adapt(minimum_jobs=2, maximum_jobs=80, wait_count=120)
cluster.scale(80)
client = Client(cluster)
cluster

VBox(children=(HTML(value='<h2>NCARCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    .…

In [4]:
col = intake.open_esm_datastore("../catalogs/glade-campaign-cesm1-le.json")
col

glade-cesm1-le-ESM Collection with 146462 entries:
	> 6 experiment(s)

	> 93 case(s)

	> 6 component(s)

	> 13 stream(s)

	> 820 variable(s)

	> 89 date_range(s)

	> 41 member_id(s)

	> 146462 path(s)

	> 1 ctrl_branch_year(s)

	> 1 year_offset(s)

In [5]:
chunksCTRL = {'chunks': {'time': 360, 'z_t': 1,  'z_w_top': 1,  'z_w_bot': 1}}
chunksOther = {'chunks': {'member_id': 40, 'time': 12, 'z_t': 1,  'z_w_top': 1,  'z_w_bot': 1}}

In [8]:
variables = ["TEMP", "UVEL", "VVEL", "WVEL", "VNS", "VNT", "SHF", "SFWF"]
variables = ["VVEL", "WVEL", "VNT", "VNS"]
col_subset = col.search(variable=variables, experiment='20C')
col_subset

glade-cesm1-le-ESM Collection with 80 entries:
	> 1 experiment(s)

	> 40 case(s)

	> 1 component(s)

	> 1 stream(s)

	> 2 variable(s)

	> 2 date_range(s)

	> 40 member_id(s)

	> 80 path(s)

	> 1 ctrl_branch_year(s)

	> 0 year_offset(s)

In [9]:
chunks = {'time': 240, 'z_t': 1,  'z_w_top': 1,  'z_w_bot': 1}
chunksOut = {'member_id': 2, 'time': 240, 'z_t': 1,  'z_w_top': 1,  'z_w_bot': 1}
datasets = col_subset.to_dataset_dict(cdf_kwargs={'chunks': chunks}, preprocess=preprocess)

for key, ds in datasets.items():
    ds = _restore_non_dim_coords(ds)
    ds = ds.chunk(chunksOut)
    datasets[key] = ds
    variable = key.split('.')[-1]
    print_ds_info(ds, variable)
    print('\n')


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.stream.variable'
                
--> There is/are 2 group(s)
Variable name: VVEL
Dataset dimensions: ('member_id', 'time', 'z_t', 'nlat', 'nlon')
Chunk shape: (2, 240, 1, 384, 320)
Dataset shape: (40, 1872, 60, 384, 320)
Chunk size: 235.93 MB
Dataset size: 2.21 TB


Variable name: WVEL
Dataset dimensions: ('member_id', 'time', 'z_w_top', 'nlat', 'nlon')
Chunk shape: (2, 240, 1, 384, 320)
Dataset shape: (40, 1872, 60, 384, 320)
Chunk size: 235.93 MB
Dataset size: 2.21 TB




In [10]:
datasets.keys()

dict_keys(['ocn.20C.pop.h.VVEL', 'ocn.20C.pop.h.WVEL'])

In [11]:
datasets['ocn.20C.pop.h.VVEL']

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.28 kB,1.28 kB
Shape,"(5,)","(5,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray
"Array Chunk Bytes 1.28 kB 1.28 kB Shape (5,) (5,) Count 2 Tasks 1 Chunks Type |S256 numpy.ndarray",5  1,

Unnamed: 0,Array,Chunk
Bytes,1.28 kB,1.28 kB
Shape,"(5,)","(5,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(60,)","(60,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (60,) (60,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",60  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(60,)","(60,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,768 B,768 B
Shape,"(3,)","(3,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray
"Array Chunk Bytes 768 B 768 B Shape (3,) (3,) Count 2 Tasks 1 Chunks Type |S256 numpy.ndarray",3  1,

Unnamed: 0,Array,Chunk
Bytes,768 B,768 B
Shape,"(3,)","(3,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,512 B,512 B
Shape,"(2,)","(2,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray
"Array Chunk Bytes 512 B 512 B Shape (2,) (2,) Count 2 Tasks 1 Chunks Type |S256 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,512 B,512 B
Shape,"(2,)","(2,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,29.95 kB,3.84 kB
Shape,"(1872, 2)","(240, 2)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 29.95 kB 3.84 kB Shape (1872, 2) (240, 2) Count 9 Tasks 8 Chunks Type object numpy.ndarray",2  1872,

Unnamed: 0,Array,Chunk
Bytes,29.95 kB,3.84 kB
Shape,"(1872, 2)","(240, 2)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,4 B
Shape,"(60,)","(1,)"
Count,61 Tasks,60 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 240 B 4 B Shape (60,) (1,) Count 61 Tasks 60 Chunks Type float32 numpy.ndarray",60  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,4 B
Shape,"(60,)","(1,)"
Count,61 Tasks,60 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 983.04 kB 983.04 kB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,983.04 kB,983.04 kB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.21 TB,235.93 MB
Shape,"(40, 1872, 60, 384, 320)","(2, 240, 1, 384, 320)"
Count,142256 Tasks,9600 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.21 TB 235.93 MB Shape (40, 1872, 60, 384, 320) (2, 240, 1, 384, 320) Count 142256 Tasks 9600 Chunks Type float32 numpy.ndarray",1872  40  320  384  60,

Unnamed: 0,Array,Chunk
Bytes,2.21 TB,235.93 MB
Shape,"(40, 1872, 60, 384, 320)","(2, 240, 1, 384, 320)"
Count,142256 Tasks,9600 Chunks
Type,float32,numpy.ndarray


In [12]:
dirout = "/glade/scratch/abanihi/lens-aws"
for key, ds in tqdm(datasets.items()):
    key = key.split('.')
    exp, cmp, var, frequency = key[1], key[0], key[-1], 'monthly'
    store = zarr_store(exp, cmp, frequency, var, write=True, dirout=dirout)
    save_data(ds, store)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

/glade/scratch/abanihi/lens-aws/ocn/monthly/cesmLE-20C-VVEL.zarr
/glade/scratch/abanihi/lens-aws/ocn/monthly/cesmLE-20C-WVEL.zarr



In [13]:
cluster.close()