# Modify intake catalog for CESM2-LENS disk access

- This notebook uses the intake catalog https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json and modifies the path column to access the same data from glade using https access.

In [2]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# import pathlib
import intake
import numpy as np
#import pandas as pd
import xarray as xr
import intake_esm
import glob
# from ecgtools import Builder
# from ecgtools.builder import INVALID_ASSET, TRACEBACK
# import s3fs
# import seaborn as sns
import re
import pandas as pd
from pathlib import Path
import aiohttp

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
rda_scratch = '/glade/campaign/collections/rda/scratch/harshah'
rda_data    = '/glade/campaign/collections/rda/data/'
#
s3_link   = 's3://ncar-cesm2-lens/'
osdf_link = 'osdf:///ncar/rda/d010092/'
#
rda_url     =  'https://data.rda.ucar.edu/'
cesm_lens   =  rda_url + 'd010092/'

##### This maps to /glade/campaign/collections/rda/transfer/ ########
rda_transfer= '/glade/campaign/collections/rda/transfer/'
cesm_posix  = rda_data + 'd010092/'
print(cesm_posix)

/glade/campaign/collections/rda/data/d010092/


In [5]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = rda_scratch+'/dask/logs',
    #interface = 'ib0'
    interface = 'ext'
)

In [6]:
cluster.scale(3)

In [7]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/32899/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.93:45313,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/32899/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Open catalogs

In [17]:
cat = intake.open_esm_datastore('https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json')
cat

Unnamed: 0,unique
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20
start_time,4


In [18]:
cat.df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
...,...,...,...,...,...,...,...,...,...,...,...,...
317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm2-lens/ocn/monthly/cesm2LE-ssp37...
318,,,ocn,historical,smbb,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr
319,,,ocn,historical,cmip6,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr
320,,,ocn,ssp370,smbb,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr


In [19]:
df =  cat.df
df['path'].head().values

array(['s3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FLNS.zarr',
       's3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FLNSC.zarr',
       's3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FLUT.zarr',
       's3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FSNS.zarr',
       's3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-FSNSC.zarr'],
      dtype=object)

In [None]:
#### Replace Amazon link ########
# df['path'] = df['path'].str.replace(s3_link, '')
# # df1['path'] = rda_url + df1['path'] + '#mode=bytes'
# # df['path'] = cesm_lens + df['path'] 
# df['path'] = cesm_posix + df['path'] 
# df

In [20]:
# df['path'] = df['path'].str.replace(s3_link,cesm_posix)
# df['path'] =  df['path'].str.replace(s3_link,cesm_lens)
df['path'] =  df['path'].str.replace(s3_link,osdf_link)
df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
...,...,...,...,...,...,...,...,...,...,...,...,...
317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,osdf:///ncar/rda/d010092/ocn/monthly/cesm2LE-s...
318,,,ocn,historical,smbb,static,,global_ocean,,,,osdf:///ncar/rda/d010092/ocn/static/grid.zarr
319,,,ocn,historical,cmip6,static,,global_ocean,,,,osdf:///ncar/rda/d010092/ocn/static/grid.zarr
320,,,ocn,ssp370,smbb,static,,global_ocean,,,,osdf:///ncar/rda/d010092/ocn/static/grid.zarr


In [21]:
# %%time
# df.to_csv(rda_data +'harshah/intake_catalogs/posix/aws-cesm2-le.csv',index='False')
# df.to_csv(rda_data +'harshah/intake_catalogs/https/aws-cesm2-le.csv',index='False')
# df.to_csv(rda_data +'harshah/intake_catalogs/osdf/cesm2-lens-aws-osdf/aws-cesm2-le.csv',index='False')

In [22]:
# cat1 = intake.open_esm_datastore(rda_data + 'harshah/intake_catalogs/posix/aws-cesm2-le.json')
# cat2 = intake.open_esm_datastore(rda_data + 'harshah/intake_catalogs/https/aws-cesm2-le.json')
cat3 = intake.open_esm_datastore(rda_data + 'harshah/intake_catalogs/osdf/cesm2-lens-aws-osdf/aws-cesm2-le.json')
cat3

Unnamed: 0,unique
Unnamed: 0,322
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20


In [None]:
cat1.df['path']

In [52]:
cat2.df['path']

0      https://data.rda.ucar.edu/d010092/atm/daily/ce...
1      https://data.rda.ucar.edu/d010092/atm/daily/ce...
2      https://data.rda.ucar.edu/d010092/atm/daily/ce...
3      https://data.rda.ucar.edu/d010092/atm/daily/ce...
4      https://data.rda.ucar.edu/d010092/atm/daily/ce...
                             ...                        
317    https://data.rda.ucar.edu/d010092/ocn/monthly/...
318    https://data.rda.ucar.edu/d010092/ocn/static/g...
319    https://data.rda.ucar.edu/d010092/ocn/static/g...
320    https://data.rda.ucar.edu/d010092/ocn/static/g...
321    https://data.rda.ucar.edu/d010092/ocn/static/g...
Name: path, Length: 322, dtype: object

In [23]:
cat3.df['path']

0      osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
1      osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
2      osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
3      osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
4      osdf:///ncar/rda/d010092/atm/daily/cesm2LE-his...
                             ...                        
317    osdf:///ncar/rda/d010092/ocn/monthly/cesm2LE-s...
318        osdf:///ncar/rda/d010092/ocn/static/grid.zarr
319        osdf:///ncar/rda/d010092/ocn/static/grid.zarr
320        osdf:///ncar/rda/d010092/ocn/static/grid.zarr
321        osdf:///ncar/rda/d010092/ocn/static/grid.zarr
Name: path, Length: 322, dtype: object

## Open the saved catalog,load some data and plot

In [24]:
# cat_temp = cat1.search(variable ='TREFHTMX', frequency ='daily')
cat_temp = cat3.search(variable ='TREFHTMX', frequency ='daily')
cat_temp

Unnamed: 0,unique
Unnamed: 0,4
variable,1
long_name,1
component,1
experiment,2
forcing_variant,2
frequency,1
vertical_levels,1
spatial_domain,1
units,1


In [25]:
dsets = cat_temp.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'


## Select variable and plot
- I am selecting Vertical Integral of temperature