In [1]:
import intake
import xarray as xr
import pandas as pd
import numpy as np
from dask.distributed import Client, LocalCluster
import dask
import datetime
import os
import configparser

In [2]:
def print_chunks(data_array):
    chunks = data_array.chunks
    dim_names = data_array.dims
    readable_chunks = {dim: chunks[i] for i, dim in enumerate(dim_names)}
    for dim, sizes in readable_chunks.items():
        print(f"{dim} chunks: {sizes}")

In [3]:
cluster=LocalCluster(n_workers=28,processes=True,threads_per_worker=1)
client = Client(cluster)

In [4]:
config = configparser.ConfigParser()
config.read('/g/data/es60/users/thomas_moore/code/BRAN2020-intake-catalog/config.ini')
# Get the value of a variable
catalog_path = config.get('paths', 'catalog_path')
#
BRAN2020_catalog = intake.open_esm_datastore(catalog_path+'BRAN2020.json',columns_with_iterables=['variable'])

In [5]:
var_request_list = ['temp']
var = var_request_list[0]
time_period_request_list = ['daily']

In [6]:
search = BRAN2020_catalog.search(variable=var,time_period=time_period_request_list)

In [7]:
xarray_open_kwargs = {"chunks": {"Time": -1,'st_ocean':10}}

In [8]:
DS=search.to_dask(xarray_open_kwargs=xarray_open_kwargs)
DS

Unnamed: 0,Array,Chunk
Bytes,11.16 TiB,106.43 MiB
Shape,"(11138, 51, 1500, 3600)","(31, 10, 300, 300)"
Dask graph,131760 chunks in 733 graph layers,131760 chunks in 733 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 11.16 TiB 106.43 MiB Shape (11138, 51, 1500, 3600) (31, 10, 300, 300) Dask graph 131760 chunks in 733 graph layers Data type float32 numpy.ndarray",11138  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,11.16 TiB,106.43 MiB
Shape,"(11138, 51, 1500, 3600)","(31, 10, 300, 300)"
Dask graph,131760 chunks in 733 graph layers,131760 chunks in 733 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
print_chunks(DS.temp)

Time chunks: (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31

In [10]:
num_tasks = len(DS.temp.data.dask)
print(f"Number of tasks : {num_tasks}")

Number of tasks : 263886


In [11]:
DS_small = DS.isel(Time=slice(0,366))
DS_small

Unnamed: 0,Array,Chunk
Bytes,375.50 GiB,106.43 MiB
Shape,"(366, 51, 1500, 3600)","(31, 10, 300, 300)"
Dask graph,4680 chunks in 734 graph layers,4680 chunks in 734 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 375.50 GiB 106.43 MiB Shape (366, 51, 1500, 3600) (31, 10, 300, 300) Dask graph 4680 chunks in 734 graph layers Data type float32 numpy.ndarray",366  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,375.50 GiB,106.43 MiB
Shape,"(366, 51, 1500, 3600)","(31, 10, 300, 300)"
Dask graph,4680 chunks in 734 graph layers,4680 chunks in 734 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [12]:
clim_small = DS_small.groupby("Time.month").mean()

In [13]:
%%time
clim_small.to_netcdf('/scratch/es60/thomas_moore/DS_small.nc')

CPU times: user 4min 9s, sys: 2min 1s, total: 6min 11s
Wall time: 8min 2s


In [15]:
xr.open_mfdataset('/scratch/es60/thomas_moore/DS_small.nc',parallel=True)

Unnamed: 0,Array,Chunk
Bytes,12.31 GiB,12.31 GiB
Shape,"(12, 51, 1500, 3600)","(12, 51, 1500, 3600)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 12.31 GiB 12.31 GiB Shape (12, 51, 1500, 3600) (12, 51, 1500, 3600) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",12  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,12.31 GiB,12.31 GiB
Shape,"(12, 51, 1500, 3600)","(12, 51, 1500, 3600)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
