# Modify intake catalog for CESM2-LENS disk access

- This notebook uses the intake catalog https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json and modifies the path column to access the same data from glade using https access.

In [2]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# import pathlib
import intake
import numpy as np
#import pandas as pd
import xarray as xr
import intake_esm
import glob
# from ecgtools import Builder
# from ecgtools.builder import INVALID_ASSET, TRACEBACK
# import s3fs
# import seaborn as sns
import re
import pandas as pd
from pathlib import Path
import aiohttp

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
rda_scratch = '/glade/campaign/collections/rda/scratch/harshah'
rda_data    = '/glade/campaign/collections/rda/data/'
#
s3_link = 's3://ncar-cesm2-lens/'
#
rda_url     =  'https://data.rda.ucar.edu/'
cesm_lens   =  rda_url + 'd010092/'

##### This maps to /glade/campaign/collections/rda/transfer/ ########
rda_transfer= '/glade/campaign/collections/rda/transfer/'
cesm_posix  = rda_data + 'd010092/'

In [5]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = rda_scratch+'/dask/logs',
    #interface = 'ib0'
    interface = 'ext'
)

In [6]:
cluster.scale(3)

In [7]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/39377/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.93:33991,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/39377/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Open catalogs

In [10]:
cat = intake.open_esm_datastore(rda_data + 'harshah/intake_catalogs/posix/aws-cesm2-le.json')
cat

Unnamed: 0,unique
Unnamed: 0,322
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20


In [11]:
cat.df

Unnamed: 0.1,Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
1,1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
2,2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
3,3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
4,4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
317,317,WVEL,vertical velocity,ocn,ssp370,smbb,monthly,60.0,global_ocean,centimeter/s,2015-01-16 12:00:00,2100-12-16 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
318,318,,,ocn,historical,smbb,static,,global_ocean,,,,/glade/campaign/collections/rda/transfer/chifa...
319,319,,,ocn,historical,cmip6,static,,global_ocean,,,,/glade/campaign/collections/rda/transfer/chifa...
320,320,,,ocn,ssp370,smbb,static,,global_ocean,,,,/glade/campaign/collections/rda/transfer/chifa...


In [12]:
df =  cat.df
df['path']

0      /glade/campaign/collections/rda/transfer/chifa...
1      /glade/campaign/collections/rda/transfer/chifa...
2      /glade/campaign/collections/rda/transfer/chifa...
3      /glade/campaign/collections/rda/transfer/chifa...
4      /glade/campaign/collections/rda/transfer/chifa...
                             ...                        
317    /glade/campaign/collections/rda/transfer/chifa...
318    /glade/campaign/collections/rda/transfer/chifa...
319    /glade/campaign/collections/rda/transfer/chifa...
320    /glade/campaign/collections/rda/transfer/chifa...
321    /glade/campaign/collections/rda/transfer/chifa...
Name: path, Length: 322, dtype: object

In [None]:
# df['path'] = df['path'].str.replace(s3_link, '')
# # df1['path'] = rda_url + df1['path'] + '#mode=bytes'
# # df['path'] = cesm_lens + df['path'] 
# df['path'] = cesm_posix + df['path'] 
# df

In [None]:
# %%time
# df.to_csv(rda_scratch +'/intake_catalogs/posix/aws-cesm2-le.csv')
# df.to_csv(rda_scratch +'/intake_catalogs/https/aws-cesm2-le.csv')

In [None]:
cat1 = intake.open_esm_datastore(rda_scratch + '/intake_catalogs/posix/aws-cesm2-le.json')
cat1

In [None]:
cat1.df['path']

## Open the saved catalog,load some data and plot

In [None]:
cat_temp = cat1.search(variable ='TREFHTMX', frequency ='daily')
cat_temp

In [None]:
dsets = cat_temp.to_dataset_dict()

In [None]:
# cat['atm.ssp370.daily.cmip6'].df

In [None]:
historical_smbb = dsets['atm.historical.daily.smbb']
future_smbb = dsets['atm.ssp370.daily.smbb']

historical_cmip6 = dsets['atm.historical.daily.cmip6']
future_cmip6 = dsets['atm.ssp370.daily.cmip6']

In [None]:
# merge_ds_smbb = xr.concat([historical_smbb, future_smbb], dim='time')
# merge_ds_smbb = merge_ds_smbb.dropna(dim='member_id')

# merge_ds_cmip6= xr.concat([historical_cmip6, future_cmip6], dim='time')
# merge_ds_cmip6 = merge_ds_cmip6.dropna(dim='member_id')

In [None]:
#########################

## Select variable and plot
- I am selecting Vertical Integral of temperature