# Notebook Used to Generate CESM2-LE Zarr Stores

In [2]:
%load_ext autoreload
%autoreload 2

import json
import os
import pprint
import random
import shutil
from functools import reduce, partial
from operator import mul
import yaml

import xarray as xr
import yaml
from distributed import Client
from dask.utils import format_bytes
from tqdm.auto import tqdm
import pandas as pd
from collections import Counter

import dask
import intake
from ncar_jobqueue import NCARCluster
from helpers import (create_grid_dataset, enforce_chunking, get_grid_vars,
                     print_ds_info, process_variables, save_data, zarr_store, fix_time, inspect_written_stores)

#dask.config.set({"distributed.dashboard.link": "/proxy/{port}/status"})
xr.set_options(keep_attrs=True)
import numpy as np

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  from distributed.utils import format_bytes


## Spin up a Cluster - Feel free to use the larger one if you need more memory

In [2]:
cluster = NCARCluster(memory="100GB", walltime='8:00:00', cores=4, processes=5, resource_spec='select=1:ncpus=5:mem=100GB')

In [2]:
cluster = NCARCluster(memory="360GB", walltime='12:00:00', cores=1, processes=36, resource_spec='select=1:ncpus=36:mem=360GB')

In [3]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.54:41312,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [None]:
cluster

## Access the data catalog

In [5]:
col = intake.open_esm_datastore(
    "/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.json",
)
col

  self._df, self.catalog_file = _fetch_catalog(self.esmcol_data, esmcol_obj, csv_kwargs)


Unnamed: 0,unique
component,6
stream,26
case,200
member_id,100
variable,1906
start_time,157
end_time,180
time_range,163
long_name,1800
units,184


## Configure which directory to write out to

In [6]:
dirout = "/glade/scratch/mgrover/data/lens2-aws"

In [7]:
#dirout = "/glade/scratch/abanihi/lens2-aws"

In [8]:
def _preprocess(ds, variables):
    """Drop all unnecessary variables and coordinates"""

    vars_to_drop = [vname for vname in ds.data_vars if vname not in variables]
    coord_vars = [
        vname
        for vname in ds.data_vars
        if "time" not in ds[vname].dims or "bound" in vname or "bnds" in vname
    ]
    ds_fixed = ds.set_coords(coord_vars)
    data_vars_dims = []
    for data_var in ds_fixed.data_vars:
        data_vars_dims.extend(list(ds_fixed[data_var].dims))
    coords_to_drop = [
        coord for coord in ds_fixed.coords if coord not in data_vars_dims
    ]
    grid_vars = list(
        set(vars_to_drop + coords_to_drop)
        - set(["time", "time_bound", "time_bnds", "time_bounds"])
    )
    ds_fixed = ds_fixed.drop(grid_vars).reset_coords()
    if "history" in ds_fixed.attrs:
        del ds_fixed.attrs["history"]
    return ds_fixed

In [6]:
with open("config.yml") as f:
    config = yaml.safe_load(f)
    
print(config)

{'atm': {'cam.h0': {'frequency': 'monthly', 'stream': 'cam.h0', 'freq': 'MS', 'time_bounds_dim': 'nbnd', 'variable_category': {'3D': {'variable': ['T', 'U', 'V', 'Q', 'Z3'], 'experiment': {'ssp370': {'chunks': {'time': 24}}, 'historical': {'chunks': {'time': 24}}}}, '2D': {'variable': ['FLNS', 'FLNSC', 'FLUT', 'FSNS', 'FSNSC', 'FSNTOA', 'ICEFRAC', 'LHFLX', 'PRECC', 'PRECL', 'PRECSC', 'PRECSL', 'PSL', 'SHFLX', 'TMQ', 'TREFHT', 'TREFHTMN', 'TREFHTMX', 'TS'], 'experiment': {'ssp370': {'chunks': {'time': 600}}, 'historical': {'chunks': {'time': 600}}}}}, 'cam.h6': {'frequency': 'daily', 'stream': 'cam.h6', 'freq': 'D', 'time_bounds_dim': 'nbnd', 'variable_category': {'3D': {'variable': ['T', 'U', 'V', 'Q', 'Z3'], 'experiment': {'ssp370': {'chunks': {'time': 10}}, 'historical': {'chunks': {'time': 10}}}}}}, 'cam.h1': {'frequency': 'daily', 'stream': 'cam.h1', 'freq': 'D', 'time_bounds_dim': 'nbnd', 'variable_category': {'2D': {'variable': ['FLNS', 'FLNSC', 'FLUT', 'FSNS', 'FSNSC', 'FSNTOA',

In [10]:
run_config = []
variables = []
for component, stream_val in config.items():
    for stream, v in stream_val.items():
        frequency = v["frequency"]
        freq = v["freq"]
        time_bounds_dim = v["time_bounds_dim"]
        variable_categories = list(v["variable_category"].keys())
        
        for v_cat in variable_categories:
            experiments = list(
                v["variable_category"][v_cat]["experiment"].keys()
            )
                
            for exp in experiments:
                chunks = v["variable_category"][v_cat]["experiment"][exp][
                        "chunks"
                    ]
                variable = v["variable_category"][v_cat]["variable"]
                try:
                    variables.extend(variable)
                except:
                    None
                
                col_subset, query = process_variables(
                        col, variable, component, stream, exp
                    )
                print(col_subset)
                if not col_subset.df.empty:
                        d = {
                            "query": query,
                            "col": col_subset,
                            "chunks": chunks,
                            "frequency": frequency,
                            "freq": freq,
                            "time_bounds_dim": time_bounds_dim,
                        }
                try:
                    run_config.append(d)
                except:
                    None

<None catalog with 30 dataset(s) from 25500 asset(s)>
<None catalog with 30 dataset(s) from 25500 asset(s)>


In [11]:
def determine_chunk_size(ds):
    ntime = len(ds.time)       # the number of time slices
    chunksize_optimal = 100e6  # desired chunk size in bytes
    ncfile_size = ds.nbytes    # the netcdf file size
    chunksize = max(int(ntime* chunksize_optimal/ ncfile_size),1)

    target_chunks = ds.dims.mapping
    target_chunks['time'] = chunksize 
    
    return dict(target_chunks) # a dictionary giving the chunk sizes in each dimension

In [None]:
import cftime

field_separator = '.'
for run in tqdm(run_config[:], desc="runs"):
    print("*" * 120)
    query = run["query"]
    print(f"query = {query}")
    frequency = run["frequency"]
    chunks = run["chunks"]
    cftime_freq = run["freq"]
    time_bounds_dim = run["time_bounds_dim"]
    
    #if query["experiment"] == "20C" and query["stream"] == "cice.h1":
    #    if query["component"] == "ice_sh":
    #        preprocess = _preprocess_ice_sh
    #    elif query["component"] == "ice_nh":
    #        preprocess = _preprocess_ice_nh
    #elif query["component"] == "lnd":
    #    preprocess = _preprocess_lnd
    #elif query["component"] == "atm":
    #    preprocess = _preprocess_atm
        
    #print(preprocess.__name__)
    
    with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        col = run["col"]
        df = col.df
        
        #for var in run['query']['variable']:
        #    
        #    df = df.replace({'path':{f'/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/day_1/{var}/b.e21.BHISTcmip6.f09_g17.LE2-1231.005.cam.h6.{var}.1860101-18691231.nc':
        #                                     f'/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/day_1/{var}/b.e21.BHISTcmip6.f09_g17.LE2-1231.005.cam.h6.{var}.18600101-18691231.nc'}})
        col.df = df
        dsets = col.to_dataset_dict(
            cdf_kwargs={"chunks": chunks, "decode_times": True, "use_cftime": True},
            progressbar=True,
        )
    
    #chunks = determine_chunk_size(dsets[list(dsets.keys())[0]])
    dsets = enforce_chunking(dsets, chunks, field_separator)
    
    for key, ds in tqdm(dsets.items(), desc="Saving zarr store"):
        ds = ds.sortby('time')
        ds = _preprocess(ds, query['variable'])
        key = key.split(field_separator)
        component = query['component']
        experiment = query['experiment']
        stream = query['stream']
        forcing_variant = key[-2]
        #control_branch_year = key[-2]
        variable = key[-1]
        
        if frequency != "hourly6":
        
            if experiment == 'historical':
                
                start = cftime.datetime(1850, 2, 1, 0, 0, 0, 0, calendar='noleap', has_year_zero=True)
                end = cftime.datetime(2015, 1, 1, 0, 0, 0, 0, calendar='noleap', has_year_zero=True)
                ds=ds.sel(time=slice(start, end)).unify_chunks()
            
                start_time = "1850-01"
                
                if frequency != 'daily':
                    end_time = "2015-01"
                    
                else:
                    end_time = "2015-01-02"
                    
                try:
                    ds = fix_time(
                                ds,
                                start=start_time,
                                end=end_time,
                                freq=cftime_freq,
                                time_bounds_dim=time_bounds_dim,
                            ).unify_chunks()
                    
                except:
                    print(ds.time)
                    continue
                
                # apply the chunks again to make sure they are uniform
                ds = ds.chunk(chunks)
                
                if frequency == 'daily':
                    with dask.config.set(**{'array.slicing.split_large_chunks': False}):
                        ds= ds.sel(time = ds.time[:-1])
            
                store = zarr_store(experiment,
                                   component,
                                   frequency, 
                                   forcing_variant,
                                   variable,
                                   write=False,
                                   dirout=dirout
                                  )
                
                try:
                    if ds.nbytes/1e12 > 2:
                
                        ds.isel(member_id=range(1)).to_zarr(store, mode='w')
                
                        for member in range(len(ds.member_id.values)-1):
                            print(member+1)
                            ds.isel(member_id=range(member+1,member+2)).to_zarr(store, append_dim='member_id')
                        client.restart()
                    
                    else:
                        save_data(ds, store)
                except:
                    print('error with ', store)
                
            elif experiment == 'ssp370':
                start = cftime.datetime(2015, 2, 1, 0, 0, 0, 0, calendar='noleap', has_year_zero=True)
                end = cftime.datetime(2101, 1, 1, 0, 0, 0, 0, calendar='noleap', has_year_zero=True)
                ds=ds.sel(time=slice(start, end)).unify_chunks()
                start_time = "2015-01"
                if frequency != 'daily':
                    end_time = "2101-01"
                    
                else:
                    end_time = "2101-01-02"
                ds = fix_time(
                            ds,
                            start=start_time,
                            end=end_time,
                            freq=cftime_freq,
                            time_bounds_dim=time_bounds_dim,
                        ).unify_chunks()
                
                # apply the chunks again to make sure they are uniform
                ds = ds.chunk(chunks)
                
                #if frequency == 'daily':
                #    with dask.config.set(**{'array.slicing.split_large_chunks': False}):
                #        ds = ds.sel(time = ds.time[:-1])

                store = zarr_store(experiment,
                                   component,
                                   frequency, 
                                   forcing_variant,
                                   variable,
                                   write=False,
                                   dirout=dirout
                                  )
            
                save_data(ds, store)
                

runs:   0%|          | 0/2 [00:00<?, ?it/s]

************************************************************************************************************************
query = {'component': 'ocn', 'stream': 'pop.h', 'variable': ['SST', 'SSH', 'SFWF', 'SHF', 'TAUX', 'TAUY', 'FW', 'TAUX2', 'TAUY2', 'QFLUX', 'HMXL', 'QSW_HTP', 'QSW_HBL', 'SHF_QSW', 'SFWF_WRST', 'RESID_S'], 'experiment': 'historical'}

--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.stream.forcing_variant.variable'


Variable name: QSW_HTP
Dataset dimensions: ('member_id', 'time', 'nlat', 'nlon')
Chunk shape: (1, 300, 384, 320)
Dataset shape: (50, 1980, 384, 320)
Chunk size: 140.62 MiB
Dataset size: 45.34 GiB


Variable name: TAUX2
Dataset dimensions: ('member_id', 'time', 'nlat', 'nlon')
Chunk shape: (1, 300, 384, 320)
Dataset shape: (50, 2412, 384, 320)
Chunk size: 140.62 MiB
Dataset size: 55.23 GiB


Variable name: HMXL
Dataset dimensions: ('member_id', 'time', 'nlat', 'nlon')
Chunk shape: (1, 300, 384, 320)
Dataset shape: (50, 2052, 384, 320)
Chunk size: 140.62 MiB
Dataset size: 46.99 GiB


Variable name: RESID_S
Dataset dimensions: ('member_id', 'time', 'nlat', 'nlon')
Chunk shape: (1, 300, 384, 320)
Dataset shape: (50, 2412, 384, 320)
Chunk size: 140.62 MiB
Dataset size: 55.23 GiB


Variable name: QSW_HBL
Dataset dimensions: ('member_id', 'time', 'nlat', 'nlon')
Chunk shape: (1, 300, 384, 320)
Dataset shape: (50, 2412, 384, 320)
Chunk size: 140.62 MiB
Dataset size: 55.23 GiB


Variable name: 

Saving zarr store:   0%|          | 0/30 [00:00<?, ?it/s]

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-QSW_HTP.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-TAUX2.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-HMXL.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-RESID_S.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-QSW_HBL.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SFWF_WRST.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SSH.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-FW.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-TAUY2.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-QFLUX.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-QSW_HBL.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SHF.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-RESID_S.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-SHF.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-QFLUX.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-FW.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-SHF_QSW.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-TAUY2.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-SSH.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-QSW_HTP.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-HMXL.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-TAUX2.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-TAUY.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SHF_QSW.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-TAUY.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SFWF.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-TAUX.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-TAUX.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-SFWF_WRST.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-SFWF.zarr
************************************************************************************************************************
query = {'component': 'ocn', 'stream': 'pop.h', 'variable': ['SALT', 'TEMP', 'UVEL', 'VNS', 'VNT', 'VVEL', 'WVEL', 'UES', 'UET', 'DIC', 'DOC', 'PD', 'O2', 'WTS', 'WTT'], 'experiment': 'historical'}

--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.stream.forcing_variant.variable'


Variable name: UET
Dataset dimensions: ('member_id', 'time', 'z_t', 'nlat', 'nlon')
Chunk shape: (1, 6, 60, 384, 320)
Dataset shape: (50, 1980, 60, 384, 320)
Chunk size: 168.75 MiB
Dataset size: 2.66 TiB


Variable name: VVEL
Dataset dimensions: ('member_id', 'time', 'z_t', 'nlat', 'nlon')
Chunk shape: (1, 6, 60, 384, 320)
Dataset shape: (50, 1980, 60, 384, 320)
Chunk size: 168.75 MiB
Dataset size: 2.66 TiB


Variable name: UET
Dataset dimensions: ('member_id', 'time', 'z_t', 'nlat', 'nlon')
Chunk shape: (1, 6, 60, 384, 320)
Dataset shape: (50, 2412, 60, 384, 320)
Chunk size: 168.75 MiB
Dataset size: 3.23 TiB


Variable name: WTS
Dataset dimensions: ('member_id', 'time', 'z_w_top', 'nlat', 'nlon')
Chunk shape: (1, 6, 60, 384, 320)
Dataset shape: (50, 2412, 60, 384, 320)
Chunk size: 168.75 MiB
Dataset size: 3.23 TiB


Variable name: UVEL
Dataset dimensions: ('member_id', 'time', 'z_t', 'nlat', 'nlon')
Chunk shape: (1, 6, 60, 384, 320)
Dataset shape: (50, 2412, 60, 384, 320)
Chunk size: 

Saving zarr store:   0%|          | 0/30 [00:00<?, ?it/s]

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-UET.zarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


Future exception was never retrieved
future: <Future finished exception=CommClosedError("Exception while trying to call remote method 'restart' before comm was established.")>
Traceback (most recent call last):
  File "/glade/work/mgrover/miniconda3/envs/cesm-collections-dev/lib/python3.9/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/glade/work/mgrover/miniconda3/envs/cesm-collections-dev/lib/python3.9/site-packages/distributed/core.py", line 787, in send_recv_from_rpc
    result = await send_recv(comm=comm, op=key, **kwargs)
  File "/glade/work/mgrover/miniconda3/envs/cesm-collections-dev/lib/python3.9/site-packages/distributed/core.py", line 640, in send_recv
    response = await comm.read(deserializers=deserializers)
  File "/glade/work/mgrover/minicond

/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-VVEL.zarr
1
2
3
4


Task exception was never retrieved
future: <Task finished name='Task-3133200' coro=<rpc.__getattr__.<locals>.send_recv_from_rpc() done, defined at /glade/work/mgrover/miniconda3/envs/cesm-collections-dev/lib/python3.9/site-packages/distributed/core.py:778> exception=CommClosedError("Exception while trying to call remote method 'restart' before comm was established.")>
Traceback (most recent call last):
  File "/glade/work/mgrover/miniconda3/envs/cesm-collections-dev/lib/python3.9/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/glade/work/mgrover/miniconda3/envs/cesm-collections-dev/lib/python3.9/site-packages/distributed/core.py", line 787, in send_recv_from_rpc
    result = await send_recv(comm=comm, op=key, **kwargs)
  File "/glade/work/mgrover/miniconda3/

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-UET.zarr
error with  /glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-UET.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-WTS.zarr
error with  /glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-WTS.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-UVEL.zarr
error with  /glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-UVEL.zarr


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-VNS.zarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SALT.zarr
error with  /glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-SALT.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-DIC.zarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-WVEL.zarr
error with  /glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-WVEL.zarr
/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-VNS.zarr
error with  /glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-cmip6-VNS.zarr
/glade/scratch

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-WTT.zarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-UES.zarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


/glade/scratch/mgrover/data/lens2-aws/ocn/monthly/cesm2LE-historical-smbb-PD.zarr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


In [15]:
client.close()
cluster.close()

  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # deleting job when job already gone
  with ignoring(RuntimeError):  # 