# Create intake catalog for ERA5 disk access

- This notebook uses the package ecgtools
- We closely follow the example https://ecgtools.readthedocs.io/en/latest/how-to/use-a-custom-parser.html
- This intake catalog is for disk access (glade) only
- ERA5 is stored in the RDA dataset ds633.0. For more details, see: https://rda.ucar.edu/datasets/ds633-0/
- Another example for an ERA5 catalog can be found here: https://tutorials.dkrz.de/tutorial_intake-1-3-dkrz-catalogs-era5.html

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import pathlib
import intake
import numpy as np
#import pandas as pd
import xarray as xr
import intake_esm
import glob
from ecgtools import Builder
from ecgtools.builder import INVALID_ASSET, TRACEBACK
# import s3fs
# import seaborn as sns
import re
import pandas as pd
from pathlib import Path


In [2]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [3]:
rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
rda_data    = '/gpfs/csfs1/collections/rda/data/'
#era5_path   = rda_data + 'ds633.0/e5.oper.an.sfc/'
era5_path   = rda_data + 'ds633.0/'
zarr_path   = rda_scratch + '/tas_zarr/'
#
rda_url     =  'https://request.rda.ucar.edu/'
#This maps to /glade/campaign/collections/rda/transfer/
rda_zarr    = rda_url + 'harshah/pelican_test/tas_zarr/'

In [4]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '2:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

## Build a custom parser 

### Understand the directory and file structure

In [5]:
era5_folders = sorted(glob.glob(era5_path+'*'))
era5_folders

['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.pl',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.sfc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.instan',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.meanflux',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.minmax',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.invariant',
 '/gpfs/csfs1/collections/rda/data/ds633.0/index.html']

### Some observations
- We observe that there is only one type of ERA data available, therefore the column name era_id = e5
- dataTpe: Two types of data are available, 'an' : Analaysis, 'fc': forecast
- Let us open each of them to see how the individual files are organized

In [6]:
era5_anfolders = sorted(glob.glob('/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/*'))
era5_anfolders[:5]

['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194002',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194003',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194004',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194005']

In [7]:
era5_fcfiles = sorted(glob.glob('/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/*/*'))
era5_fcfiles[:5]

['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_008_sro.ll025sc.1940010106_1940011606.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_008_sro.ll025sc.1940010106_1940011606.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_008_sro.ll025sc.1940011606_1940020106.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_008_sro.ll025sc.1940011606_1940020106.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_009_ssro.ll025sc.1940010106_1940011606.grb']

In [8]:
era_mflux = sorted(glob.glob(era5_path+ 'e5.oper.fc.sfc.meanflux/*/*'))
era_mflux[:5]

['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.meanflux/194001/e5.oper.fc.sfc.meanflux.235_020_msror.ll025sc.1940010106_1940011606.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.meanflux/194001/e5.oper.fc.sfc.meanflux.235_020_msror.ll025sc.1940010106_1940011606.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.meanflux/194001/e5.oper.fc.sfc.meanflux.235_020_msror.ll025sc.1940011606_1940020106.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.meanflux/194001/e5.oper.fc.sfc.meanflux.235_020_msror.ll025sc.1940011606_1940020106.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.meanflux/194001/e5.oper.fc.sfc.meanflux.235_021_mssror.ll025sc.1940010106_1940011606.grb']

In [9]:
%%time
era5_anfiles = sorted(glob.glob('/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/*/*'))
era5_anfiles[:10]

CPU times: user 138 ms, sys: 63.1 ms, total: 201 ms
Wall time: 213 ms


['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_053_vima.ll025sc.1940010100_1940013123.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_053_vima.ll025sc.1940010100_1940013123.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_054_vit.ll025sc.1940010100_1940013123.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_054_vit.ll025sc.1940010100_1940013123.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_059_vike.ll025sc.1940010100_1940013123.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_059_vike.ll025sc.1940010100_1940013123.nc',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_060_vithe.ll025sc.1940010100_1940013123.grb',
 '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinte

In [10]:
ds =  xr.open_dataset(era5_fcfiles[1],engine='netcdf4')
ds

### More observations
- Data is saved in monthly files on a 0.25 degree grid.
- There are both NetCDF and GRIB files for each variable with hourly data from 00:00 to 23:00 for the all the days of the month
- File names have ECMFW table numbers https://rda.ucar.edu/datasets/ds633.0/#
- And parameter codes which have a one-to-one correspondence with a variable. See https://codes.ecmwf.int/grib/param-db/

In [11]:
path = pathlib.Path(era5_anfiles[1])
path.stem.split('_')

['e5.oper.an.vinteg.162', '053', 'vima.ll025sc.1940010100', '1940013123']

In [12]:
path_folder = pathlib.Path(era5_folders[0])
path_folder.stem.split('.')

['e5', 'oper', 'an']

In [13]:
variable_list = [var for var in ds if 'long_name' in ds[var].attrs]
variable_list

['SRO', 'utc_date']

In [14]:
ds['SRO'].attrs.get('long_name', 'NA')

'Surface runoff'

### Build a custom parser

In [15]:
import re
import xarray as xr
from pathlib import Path

def parse_era5(file_path):
    # Convert file_path to a Path object for easier handling
    file_path = Path(file_path)
    
    # Check if the file is a NetCDF file
    if file_path.suffix != '.nc':
        return {}  # Return an empty dictionary if the file is not a NetCDF file

    # Enhanced regular expression to accurately parse the date-time segments in the filename
    pattern = re.compile(r'''
        (?P<era_id>e5)\.oper\.
        (?P<datatype>[^.]+)
        (?:\.(?P<level_type>[^.]+))?  # Making level_type optional
        \.(?P<step_type>[^.]+)
        \.(?P<table_code>\d+)
        _(?P<param_code>\d+)
        _(?P<variable_short_name>\w+)
        .*?
        (?P<year>\d{4})
        (?P<month>\d{2})
        .*?                           # Skip any characters until the extension
        \.nc$                         # Ensure the file ends with .nc
        ''', re.VERBOSE | re.DOTALL)

    match = pattern.search(file_path.name)

    if match:
        try:
            with xr.open_dataset(file_path) as ds:
                # Assuming there is typically one main variable of interest
                if ds.data_vars:
                    var_name = list(ds.data_vars)[0]  # Select the first variable if no specific variable is assumed
                    var = ds[var_name]
                    short_name = var_name
                    long_name = var.attrs.get('long_name', 'N/A')
                    units = var.attrs.get('units', 'N/A')
                else:
                    short_name, long_name, units = 'N/A', 'N/A', 'N/A'

                details = {
                    'era_id': match.group('era_id'),
                    'datatype': match.group('datatype'),
                    'level_type': match.group('level_type') if match.group('level_type') else '',
                    'step_type': match.group('step_type'),
                    'table_code': match.group('table_code'),
                    'param_code': match.group('param_code'),
                    'variable': short_name,
                    'long_name': long_name,
                    'units': units,
                    'year': match.group('year'),
                    'month': match.group('month'),
                    'format': 'nc',
                    'frequency': 'hourly',  # Assuming hourly frequency based on context
                    'path': str(file_path)  # Include full file path
                }

            return details
        except Exception as e:
            print(f"Failed to open dataset or extract variables: {e}")
            return {}

    else:
        print("Failed to match the filename pattern.")
        return {}  # Return an empty dictionary if no match is found

In [16]:
era5_fcfiles[1]

'/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_008_sro.ll025sc.1940010106_1940011606.nc'

In [17]:
era5_anfiles[1]

'/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_053_vima.ll025sc.1940010100_1940013123.nc'

In [18]:
file_path = era5_anfiles[1]
df = parse_era5(file_path)
df

{'era_id': 'e5',
 'datatype': 'an',
 'level_type': '',
 'step_type': 'vinteg',
 'table_code': '162',
 'param_code': '053',
 'variable': 'VIMA',
 'long_name': 'Vertical integral of mass of atmosphere',
 'units': 'kg m**-2',
 'year': '1940',
 'month': '01',
 'format': 'nc',
 'frequency': 'hourly',
 'path': '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/e5.oper.an.vinteg.162_053_vima.ll025sc.1940010100_1940013123.nc'}

In [19]:
file_path = era5_fcfiles[1]
df = parse_era5(file_path)
df

{'era_id': 'e5',
 'datatype': 'fc',
 'level_type': 'sfc',
 'step_type': 'accumu',
 'table_code': '128',
 'param_code': '008',
 'variable': 'SRO',
 'long_name': 'Surface runoff',
 'units': 'm',
 'year': '1940',
 'month': '01',
 'format': 'nc',
 'frequency': 'hourly',
 'path': '/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.fc.sfc.accumu/194001/e5.oper.fc.sfc.accumu.128_008_sro.ll025sc.1940010106_1940011606.nc'}

### Pass this parser to a builder object
- We have built a custom parser function which is flexible enough to handle different filepaths.
- Let us now use it to construct a Builder object for a single directory to check if everything works!

In [20]:
print(era5_path+'e5.oper.an.vinteg/194001/')

/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001/


In [21]:
b = Builder(paths=[era5_path+'e5.oper.an.vinteg/194001'],depth=0,exclude_patterns=['*.grb'])
b

Builder(paths=['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001'], storage_options={}, depth=0, exclude_patterns=['*.grb'], include_patterns=[], joblib_parallel_kwargs={})

In [22]:
%%time
b.build(parsing_func= parse_era5)

CPU times: user 505 ms, sys: 135 ms, total: 640 ms
Wall time: 6.06 s


Builder(paths=['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/194001'], storage_options={}, depth=0, exclude_patterns=['*.grb'], include_patterns=[], joblib_parallel_kwargs={})

In [23]:
b.df.head()

Unnamed: 0,era_id,datatype,level_type,step_type,table_code,param_code,variable,long_name,units,year,month,format,frequency,path
0,e5,an,,vinteg,162,53,VIMA,Vertical integral of mass of atmosphere,kg m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
1,e5,an,,vinteg,162,54,VIT,Vertical integral of temperature,K kg m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
2,e5,an,,vinteg,162,59,VIKE,Vertical integral of kinetic energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
3,e5,an,,vinteg,162,60,VITHE,Vertical integral of thermal energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
4,e5,an,,vinteg,162,61,VIPIE,Vertical integral of potential+internal energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...


In [24]:
%%time
b.save(
    name='era5_catalog_test',
    path_column_name='path',
    variable_column_name='variable',
    data_format='netcdf',
    groupby_attrs=[
        'datatype',
        'level_type',
        'step_type'
    ],
    aggregations=[
        {'type': 'union', 'attribute_name': 'variable'},
        {
            'type': 'join_existing',
            'attribute_name': 'time_range',
            'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
        },
    ],
    description = 'This is the NetCDF collection of vertical integrals in the ERA5 dataset ds633, which is a part of NCAR glade collection. ',
    directory = '/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/'
)

Successfully wrote ESM catalog json file to: file:///gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs//era5_catalog_test.json
CPU times: user 630 µs, sys: 4.13 ms, total: 4.76 ms
Wall time: 6.31 ms


## Open the saved catalog and test its properties

In [25]:
test_catalog = intake.open_esm_datastore('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog_test.json')
test_catalog

Unnamed: 0,unique
era_id,1
datatype,1
level_type,0
step_type,1
table_code,1
param_code,36
variable,36
long_name,36
units,7
year,1


In [26]:
test_ds = test_catalog.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'datatype.step_type'


### Inspect the keys

In [27]:
test_ds.keys()

dict_keys(['an.vinteg'])

In [29]:
test_ds['an.vinteg']

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 109 graph layers,1 chunks in 109 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 2.91 kiB 2.91 kiB Shape (744,) (744,) Dask graph 1 chunks in 109 graph layers Data type int32 numpy.ndarray",744  1,

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 109 graph layers,1 chunks in 109 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Select variable and plot
- I am selecting Vertical Integral of temperature

In [31]:
test_ds['an.vinteg'].VIT

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 109 graph layers,1 chunks in 109 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 2.91 kiB 2.91 kiB Shape (744,) (744,) Dask graph 1 chunks in 109 graph layers Data type int32 numpy.ndarray",744  1,

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 109 graph layers,1 chunks in 109 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray


### Builder object for all files

In [32]:
b_vint = Builder(paths=[era5_path+'e5.oper.an.vinteg/'],depth=1,exclude_patterns=['*.grb'])
b_vint

Builder(paths=['/gpfs/csfs1/collections/rda/data/ds633.0/e5.oper.an.vinteg/'], storage_options={}, depth=1, exclude_patterns=['*.grb'], include_patterns=[], joblib_parallel_kwargs={})

In [None]:
%%time
b_vint.build(parsing_func= parse_era5)

In [None]:
b_vint.df

In [None]:
%%time
b_vint.save(
    name='era5_catalog_vint',
    path_column_name='path',
    variable_column_name='variable',
    data_format='netcdf',
    groupby_attrs=[
        'datatype',
        'level_type',
        'step_type'
    ],
    aggregations=[
        {'type': 'union', 'attribute_name': 'variable'},
        {
            'type': 'join_existing',
            'attribute_name': 'time_range',
            'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
        },
    ],
    description = 'This is the NetCDF collection of vertical integrals in the ERA5 dataset ds633, which is a part of NCAR glade collection. ',
    directory = '/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/'
)

## Save and test catalog

In [5]:
# Open collection description file using intake
catalog_url = 'https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json'
col         = intake.open_esm_datastore(catalog_url)
col

Unnamed: 0,unique
variable,78
long_name,75
component,5
experiment,4
frequency,6
vertical_levels,3
spatial_domain,5
units,25
start_time,12
end_time,13


### Merge the data

In [11]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:36433,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [12]:
cluster.scale(8)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:36433,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B
