# Modify intake catalog for CESM2-LENS disk access

- This notebook uses the intake catalog https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json and modifies the path column to access the same data from glade using https access.

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# import pathlib
import intake
import numpy as np
#import pandas as pd
import xarray as xr
import intake_esm
import glob
from ecgtools import Builder
from ecgtools.builder import INVALID_ASSET, TRACEBACK
# import s3fs
# import seaborn as sns
import re
import pandas as pd
from pathlib import Path
import aiohttp

In [2]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [22]:
rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
rda_data    = '/gpfs/csfs1/collections/rda/data/'
#
s3_link = 's3://ncar-cesm2-lens/'
#
rda_url     =  'https://data.rda.ucar.edu/'
cesm_lens   =  rda_url + 'chifan_AWS/ncar-cesm2-lens/'

##### This maps to /glade/campaign/collections/rda/transfer/ ########
rda_transfer= '/glade/campaign/collections/rda/transfer/'
cesm_posix  = rda_transfer + 'chifan_AWS/ncar-cesm2-lens/'

In [4]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = rda_scratch+'/dask/logs',
    #interface = 'ib0'
    interface = 'ext'
)

In [5]:
cluster.scale(10)

In [6]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.96:44003,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Open catalogs

In [37]:
cat = intake.open_esm_datastore('/glade/u/home/harshah/intake_catalogs/aws-cesm2-le.json')
cat

Unnamed: 0,unique
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20
start_time,4


In [38]:
cat.df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
...,...,...,...,...,...,...,...,...,...,...,...,...
317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm2-lens/ocn/monthly/cesm2LE-ssp37...
318,,,ocn,historical,smbb,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr
319,,,ocn,historical,cmip6,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr
320,,,ocn,ssp370,smbb,static,,global_ocean,,,,s3://ncar-cesm2-lens/ocn/static/grid.zarr


In [39]:
df =  cat.df
df['path']

0      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
3      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
4      s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
                             ...                        
317    s3://ncar-cesm2-lens/ocn/monthly/cesm2LE-ssp37...
318            s3://ncar-cesm2-lens/ocn/static/grid.zarr
319            s3://ncar-cesm2-lens/ocn/static/grid.zarr
320            s3://ncar-cesm2-lens/ocn/static/grid.zarr
321            s3://ncar-cesm2-lens/ocn/static/grid.zarr
Name: path, Length: 322, dtype: object

In [40]:
# df['path'] = df['path'].str.replace(s3_link, '')
# # df1['path'] = rda_url + df1['path'] + '#mode=bytes'
# # df['path'] = cesm_lens + df['path'] 
# df['path'] = cesm_posix + df['path'] 
# df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
...,...,...,...,...,...,...,...,...,...,...,...,...
317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
318,,,ocn,historical,smbb,static,,global_ocean,,,,/glade/campaign/collections/rda/transfer/chifa...
319,,,ocn,historical,cmip6,static,,global_ocean,,,,/glade/campaign/collections/rda/transfer/chifa...
320,,,ocn,ssp370,smbb,static,,global_ocean,,,,/glade/campaign/collections/rda/transfer/chifa...


In [41]:
# %%time
# df.to_csv(rda_scratch +'/intake_catalogs/posix/aws-cesm2-le.csv')
# df.to_csv(rda_scratch +'/intake_catalogs/https/aws-cesm2-le.csv')

CPU times: user 4.32 ms, sys: 0 ns, total: 4.32 ms
Wall time: 12 ms


In [42]:
cat1 = intake.open_esm_datastore(rda_scratch + '/intake_catalogs/posix/aws-cesm2-le.json')
cat1

Unnamed: 0,unique
Unnamed: 0,322
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20


In [43]:
cat1.df['path']

0      /glade/campaign/collections/rda/transfer/chifa...
1      /glade/campaign/collections/rda/transfer/chifa...
2      /glade/campaign/collections/rda/transfer/chifa...
3      /glade/campaign/collections/rda/transfer/chifa...
4      /glade/campaign/collections/rda/transfer/chifa...
                             ...                        
317    /glade/campaign/collections/rda/transfer/chifa...
318    /glade/campaign/collections/rda/transfer/chifa...
319    /glade/campaign/collections/rda/transfer/chifa...
320    /glade/campaign/collections/rda/transfer/chifa...
321    /glade/campaign/collections/rda/transfer/chifa...
Name: path, Length: 322, dtype: object

## Open the saved catalog,load some data and plot

In [44]:
cat_temp = cat1.search(variable ='TREFHTMX', frequency ='daily')
cat_temp

Unnamed: 0,unique
Unnamed: 0,4
variable,1
long_name,1
component,1
experiment,2
forcing_variant,2
frequency,1
vertical_levels,1
spatial_domain,1
units,1


In [45]:
dsets = cat_temp.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'


In [47]:
# cat['atm.ssp370.daily.cmip6'].df

In [48]:
historical_smbb = dsets['atm.historical.daily.smbb']
future_smbb = dsets['atm.ssp370.daily.smbb']

historical_cmip6 = dsets['atm.historical.daily.cmip6']
future_cmip6 = dsets['atm.ssp370.daily.cmip6']

In [None]:
# merge_ds_smbb = xr.concat([historical_smbb, future_smbb], dim='time')
# merge_ds_smbb = merge_ds_smbb.dropna(dim='member_id')

# merge_ds_cmip6= xr.concat([historical_cmip6, future_cmip6], dim='time')
# merge_ds_cmip6 = merge_ds_cmip6.dropna(dim='member_id')

In [None]:
#########################

## Select variable and plot
- I am selecting Vertical Integral of temperature