# Modify intake catalog for CESM2-LENS disk access

- This notebook uses the intake catalog https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json and modifies the path column to access the same data from glade using https access.

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# import pathlib
import intake
import numpy as np
#import pandas as pd
import xarray as xr
import intake_esm
import glob
from ecgtools import Builder
from ecgtools.builder import INVALID_ASSET, TRACEBACK
# import s3fs
# import seaborn as sns
import re
import pandas as pd
from pathlib import Path
import aiohttp

In [2]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [23]:
rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
rda_data    = '/gpfs/csfs1/collections/rda/data/'
#
s3_link = 's3://ncar-cesm2-lens/'
#
rda_url     =  'https://data.rda.ucar.edu/'
cesm_lens  =  rda_url + 'chifan_AWS/'
#This maps to /glade/campaign/collections/rda/transfer/
# rda_zarr    = rda_url + 'harshah/pelican_test/tas_zarr/'

In [5]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = rda_scratch+'/dask/logs',
    #interface = 'ib0'
    interface = 'ext'
)

In [6]:
cluster.scale(10)

In [7]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/35813/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.94:44751,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/35813/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Open catalogs

In [24]:
cat = intake.open_esm_datastore('/glade/u/home/harshah/intake_catalogs/aws-cesm2-le.json')
cat

Unnamed: 0,unique
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20
start_time,4


In [25]:
cat.df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
...,...,...,...,...,...,...,...,...,...,...,...,...
317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm2-lens/ocn/monthly/cesm2LE-ssp37...
318,,,ocn,historical,smbb,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr
319,,,ocn,historical,cmip6,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr
320,,,ocn,ssp370,smbb,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr


In [26]:
df =  cat.df
df['path']

0      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
3      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
4      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
                             ...                        
317    s3://ncar-cesm2-lens/ocn/monthly/cesm2LE-ssp37...
318            s3://ncar-cesm2-lens/ocn/static/grid.zarr
319            s3://ncar-cesm2-lens/ocn/static/grid.zarr
320            s3://ncar-cesm2-lens/ocn/static/grid.zarr
321            s3://ncar-cesm2-lens/ocn/static/grid.zarr
Name: path, Length: 322, dtype: object

In [27]:
df['path'] = df['path'].str.replace(s3_link, '')
# df1['path'] = rda_url + df1['path'] + '#mode=bytes'
df['path'] = cesm_lens + df['path'] 
df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,https://data.rda.ucar.edu/chifan_AWS/atm/daily...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,https://data.rda.ucar.edu/chifan_AWS/atm/daily...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,https://data.rda.ucar.edu/chifan_AWS/atm/daily...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,https://data.rda.ucar.edu/chifan_AWS/atm/daily...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,https://data.rda.ucar.edu/chifan_AWS/atm/daily...
...,...,...,...,...,...,...,...,...,...,...,...,...
317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,https://data.rda.ucar.edu/chifan_AWS/ocn/month...
318,,,ocn,historical,smbb,static,,global_ocean,,,,https://data.rda.ucar.edu/chifan_AWS/ocn/stati...
319,,,ocn,historical,cmip6,static,,global_ocean,,,,https://data.rda.ucar.edu/chifan_AWS/ocn/stati...
320,,,ocn,ssp370,smbb,static,,global_ocean,,,,https://data.rda.ucar.edu/chifan_AWS/ocn/stati...


In [36]:
# %%time
# df.to_csv(rda_scratch +'/intake_catalogs/aws-cesm2-le.csv')

CPU times: user 699 µs, sys: 2.89 ms, total: 3.59 ms
Wall time: 8.93 ms


In [39]:
cat1 = intake.open_esm_datastore('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/aws-cesm2-le.json')
cat1

Unnamed: 0,unique
Unnamed: 0,322
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20


In [43]:
cat_temp = cat1.search(variable ='TREFHTMX', frequency ='daily')
cat_temp

Unnamed: 0,unique
Unnamed: 0,4
variable,1
long_name,1
component,1
experiment,2
forcing_variant,2
frequency,1
vertical_levels,1
spatial_domain,1
units,1


In [None]:
#########################

In [None]:
b.save(
    # File path - could save as .csv (uncompressed csv) or .csv.gz (compressed csv)
    "/glade/work/mgrover/cesm-hist-test.csv",
    # Column name including filepath
    path_column_name='path',
    # Column name including variables
    variable_column_name='variables',
    # Data file format - could be netcdf or zarr (in this case, netcdf)
    data_format="netcdf",
    # Which attributes to groupby when reading in variables using intake-esm
    groupby_attrs=["component", "stream", "case"],
    # Aggregations which are fed into xarray when reading in data using intake
    aggregations=[
        {
            "type": "join_existing",
            "attribute_name": "date",
            "options": {"dim": "time", "coords": "minimal", "compat": "override"},
        }
    ],
)

## Open the saved catalog and test its properties

In [189]:
test_catalog = intake.open_esm_datastore('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog_test.json')
test_catalog

Unnamed: 0,unique
Unnamed: 0,36
era_id,1
datatype,1
level_type,0
step_type,1
table_code,1
param_code,36
variable,36
long_name,36
units,7


In [190]:
test_catalog.to_dataset_dict(xarray_open_kwargs={""})


--> The keys in the returned dictionary of datasets are constructed as follows:
	'datatype.step_type'


ESMDataSourceError: Failed to load dataset with key='an.vinteg'
                 You can use `cat['an.vinteg'].df` to inspect the assets/files for this key.
                 

In [140]:
test_ds = test_catalog.to_dataset_dict(xarray_open_kwargs={"engine":"netcdf4"})


--> The keys in the returned dictionary of datasets are constructed as follows:
	'datatype.step_type'


ESMDataSourceError: Failed to load dataset with key='an.vinteg'
                 You can use `cat['an.vinteg'].df` to inspect the assets/files for this key.
                 

### Inspect the keys

In [26]:
test_ds.keys()

dict_keys(['an.vinteg'])

In [27]:
test_ds['an.vinteg']

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 109 graph layers,1 chunks in 109 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 2.91 kiB 2.91 kiB Shape (744,) (744,) Dask graph 1 chunks in 109 graph layers Data type int32 numpy.ndarray",744  1,

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 109 graph layers,1 chunks in 109 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Select variable and plot
- I am selecting Vertical Integral of temperature

In [None]:
test_ds['an.vinteg'].VIT

## Spin up cluster

In [17]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/40579/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/40579/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:33191,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/40579/status,Total threads: 0
Started: 6 minutes ago,Total memory: 0 B


In [18]:
cluster.scale(15)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/40579/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:33191,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/40579/status,Total threads: 0
Started: 6 minutes ago,Total memory: 0 B


### Builder object for all files

In [None]:
#b_an = Builder(paths=[era5_path+'e5.oper.an.*/'],depth=1,exclude_patterns=['*.grb'])
# b_era = Builder(paths=[era5_path],depth=2,exclude_patterns=['*.grb','.html'],joblib_parallel_kwargs = {
#          'n_jobs': 15,  # Utilize all 10 cores
#          'backend': 'loky',  # 'loky' is good for managing processes, especially if you're not using Dask integration
#         })
# b_era

In [None]:
# %%time
# b_era.build(parsing_func= parse_era5)

In [None]:
b_era_df = b_era.df
b_era_df

# Inspect the catalog
- We observe that there are several rows which are NaN, we should drop them before saving our catalog
- We also see that not all files have a `level_type'. So, we should probably not use this column as a groupby attribute

In [None]:
# Check for NaN values in the 'datatype' column
print(b_era_df['datatype'].isnull().value_counts())

In [None]:
# Replace NaN values in the 'datatype' column with the string 'NA'
b_era_df = b_era_df.dropna()
b_era_df

In [None]:
b_era_df['level_type'] = b_era_df['level_type'].replace('NaN', 'NA')
b_era.df = b_era_df  # Update the builder's DataFrame with the modified one
# Check for NaN values in the 'level_type' column
print(b_era_df['level_type'].isnull().value_counts())

In [None]:
b_era_df['level_type']

- Check to see which files were not parsed by calling .invalid_assets

In [None]:
b_era.invalid_assets

In [None]:
# %%time
# b_era.save(
#     name='era5_catalog',
#     path_column_name='path',
#     variable_column_name='variable',
#     data_format='netcdf',
#     groupby_attrs=[
#         'datatype',
#         #'level_type',
#         'step_type'
#     ],
#     aggregations=[
#         {'type': 'union', 'attribute_name': 'variable'},
#         {
#             'type': 'join_existing',
#             'attribute_name': 'time_range',
#             'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
#         },
#     ],
#     description = 'This is the NetCDF collection of the publicly accessible ERA5 dataset, which is a part of the NCAR glade collection. ',
#     directory = '/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs'
# )

## Test the catalog, load some data and plot

In [6]:
# test = pd.read_json('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog.json')
# test

In [127]:
col.df

Unnamed: 0.1,Unnamed: 0,era_id,datatype,level_type,step_type,table_code,param_code,variable,long_name,units,year,month,format,frequency,path
0,0,e5,an,,vinteg,162,53,VIMA,Vertical integral of mass of atmosphere,kg m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
1,1,e5,an,,vinteg,162,54,VIT,Vertical integral of temperature,K kg m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
2,2,e5,an,,vinteg,162,59,VIKE,Vertical integral of kinetic energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
3,3,e5,an,,vinteg,162,60,VITHE,Vertical integral of thermal energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
4,4,e5,an,,vinteg,162,61,VIPIE,Vertical integral of potential+internal energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
5,5,e5,an,,vinteg,162,62,VIPILE,Vertical integral of potential+internal+latent...,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
6,6,e5,an,,vinteg,162,63,VITOE,Vertical integral of total energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
7,7,e5,an,,vinteg,162,64,VIEC,Vertical integral of energy conversion,W m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
8,8,e5,an,,vinteg,162,65,VIMAE,Vertical integral of eastward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
9,9,e5,an,,vinteg,162,66,VIMAN,Vertical integral of northward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...


In [126]:
col = intake.open_esm_datastore('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog_test.json')
col

Unnamed: 0,unique
Unnamed: 0,36
era_id,1
datatype,1
level_type,0
step_type,1
table_code,1
param_code,36
variable,36
long_name,36
units,7


In [98]:
col.df

Unnamed: 0,era_id,datatype,level_type,step_type,table_code,param_code,variable,long_name,units,year,month,format,frequency,path
0,e5,an,,vinteg,162,53,VIMA,Vertical integral of mass of atmosphere,kg m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
1,e5,an,,vinteg,162,54,VIT,Vertical integral of temperature,K kg m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
2,e5,an,,vinteg,162,59,VIKE,Vertical integral of kinetic energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
3,e5,an,,vinteg,162,60,VITHE,Vertical integral of thermal energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
4,e5,an,,vinteg,162,61,VIPIE,Vertical integral of potential+internal energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
5,e5,an,,vinteg,162,62,VIPILE,Vertical integral of potential+internal+latent...,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
6,e5,an,,vinteg,162,63,VITOE,Vertical integral of total energy,J m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
7,e5,an,,vinteg,162,64,VIEC,Vertical integral of energy conversion,W m**-2,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
8,e5,an,,vinteg,162,65,VIMAE,Vertical integral of eastward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...
9,e5,an,,vinteg,162,66,VIMAN,Vertical integral of northward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,/gpfs/csfs1/collections/rda/data/ds633.0/e5.op...


In [99]:
df1 = col.df

In [100]:
df1['path']= df1['path'].str.replace(rda_data, '')
df1['path'] = rda_url + df1['path'] + '#mode=bytes'
df1

Unnamed: 0,era_id,datatype,level_type,step_type,table_code,param_code,variable,long_name,units,year,month,format,frequency,path
0,e5,an,,vinteg,162,53,VIMA,Vertical integral of mass of atmosphere,kg m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
1,e5,an,,vinteg,162,54,VIT,Vertical integral of temperature,K kg m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
2,e5,an,,vinteg,162,59,VIKE,Vertical integral of kinetic energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
3,e5,an,,vinteg,162,60,VITHE,Vertical integral of thermal energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
4,e5,an,,vinteg,162,61,VIPIE,Vertical integral of potential+internal energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
5,e5,an,,vinteg,162,62,VIPILE,Vertical integral of potential+internal+latent...,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
6,e5,an,,vinteg,162,63,VITOE,Vertical integral of total energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
7,e5,an,,vinteg,162,64,VIEC,Vertical integral of energy conversion,W m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
8,e5,an,,vinteg,162,65,VIMAE,Vertical integral of eastward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
9,e5,an,,vinteg,162,66,VIMAN,Vertical integral of northward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...


In [83]:
# # Drop the column named 'Unnamed: 0' if it exists
# df1 = df1.loc[:, ~df1.columns.str.contains('^Unnamed')]
# df1

In [84]:
# df1.to_csv('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog.csv',index=False)

In [85]:
# test_path = df1['path'][0] + '#mode=bytes'

In [101]:
df1.to_csv("/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog_test.csv")

In [102]:
col = intake.open_esm_datastore('/gpfs/csfs1/collections/rda/scratch/harshah/intake_catalogs/era5_catalog_test.json')
col.df

Unnamed: 0.1,Unnamed: 0,era_id,datatype,level_type,step_type,table_code,param_code,variable,long_name,units,year,month,format,frequency,path
0,0,e5,an,,vinteg,162,53,VIMA,Vertical integral of mass of atmosphere,kg m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
1,1,e5,an,,vinteg,162,54,VIT,Vertical integral of temperature,K kg m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
2,2,e5,an,,vinteg,162,59,VIKE,Vertical integral of kinetic energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
3,3,e5,an,,vinteg,162,60,VITHE,Vertical integral of thermal energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
4,4,e5,an,,vinteg,162,61,VIPIE,Vertical integral of potential+internal energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
5,5,e5,an,,vinteg,162,62,VIPILE,Vertical integral of potential+internal+latent...,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
6,6,e5,an,,vinteg,162,63,VITOE,Vertical integral of total energy,J m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
7,7,e5,an,,vinteg,162,64,VIEC,Vertical integral of energy conversion,W m**-2,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
8,8,e5,an,,vinteg,162,65,VIMAE,Vertical integral of eastward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...
9,9,e5,an,,vinteg,162,66,VIMAN,Vertical integral of northward mass flux,kg m**-1 s**-1,1940,1,nc,hourly,https://data.rda.ucar.edu/ds633.0/e5.oper.an.v...


In [69]:
%%time
test = xr.open_dataset(col.df['path'][0],engine='netcdf4')
test = test.PV.isel(latitude=0,longitude =1,level=0)

AttributeError: 'Dataset' object has no attribute 'PV'

In [104]:
xr.open_mfdataset(col.df.path.values[0:2], engine='netcdf4')

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 2.91 kiB 2.91 kiB Shape (744,) (744,) Dask graph 1 chunks in 5 graph layers Data type int32 numpy.ndarray",744  1,

Unnamed: 0,Array,Chunk
Bytes,2.91 kiB,2.91 kiB
Shape,"(744,)","(744,)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.88 GiB 3.97 MiB Shape (744, 721, 1440) (27, 139, 277) Dask graph 1008 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  744,

Unnamed: 0,Array,Chunk
Bytes,2.88 GiB,3.97 MiB
Shape,"(744, 721, 1440)","(27, 139, 277)"
Dask graph,1008 chunks in 2 graph layers,1008 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [105]:
dsets = col.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'datatype.step_type'


ESMDataSourceError: Failed to load dataset with key='an.vinteg'
                 You can use `cat['an.vinteg'].df` to inspect the assets/files for this key.
                 

In [79]:
ds = xr.open_mfdataset(col['an.vinteg'].df.path.values)

KeyboardInterrupt: 

In [54]:
cat = col.search(variable='MN2T',frequency='hourly', year=1940)
cat

Unnamed: 0,unique
era_id,0
datatype,0
level_type,0
step_type,0
table_code,0
param_code,0
variable,0
long_name,0
units,0
year,0


In [55]:
dsets = cat.to_dataset_dict(aggregate=False)

- Inspect keys

In [57]:
cat.df

Unnamed: 0,era_id,datatype,level_type,step_type,table_code,param_code,variable,long_name,units,year,month,format,frequency,path


In [10]:
%%time
test2 = xr.open_dataset(cat.df['path'][0],engine='netcdf4')
test2

CPU times: user 41 ms, sys: 1.52 ms, total: 42.5 ms
Wall time: 1.11 s


In [11]:
print(cat.df['path'][0])

https://data.rda.ucar.edu/ds633.0/e5.oper.fc.sfc.minmax/194001/e5.oper.fc.sfc.minmax.128_202_mn2t.ll025sc.1940010106_1940011606.nc#mode=bytes


In [None]:
dsets.keys()

In [None]:
ds = dsets['fc.minmax']
ds

In [None]:
%%time
ds.MN2T.isel(forecast_initial_time=0,forecast_hour=0).plot()