# Modify intake catalog for NA-CORDEX data access

- This notebook uses the intake catalog https://ncar-na-cordex.s3-us-west-2.amazonaws.com/catalogs/aws-na-cordex.json  and modifies the path column to access the same data from glade using https/osdf access.

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
# import pathlib
import intake
import numpy as np
#import pandas as pd
import xarray as xr
import intake_esm
import glob
# from ecgtools import Builder
# from ecgtools.builder import INVALID_ASSET, TRACEBACK
# import s3fs
# import seaborn as sns
import re
import pandas as pd
from pathlib import Path
import aiohttp

In [2]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [3]:
rda_scratch = '/glade/campaign/collections/rda/scratch/harshah'
rda_data    = '/glade/campaign/collections/rda/data/'
#
s3_link   = 's3://ncar-na-cordex/'
osdf_link = 'osdf:///ncar/rda/d316009/'
#
rda_url          =  'https://data.rda.ucar.edu/'
nacordex_https   =  rda_url + 'd316009/'
#
nacordex_posix  = rda_data + 'd316009/'
print(nacordex_https)

https://data.rda.ucar.edu/d316009/


In [4]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '4GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=4GB',
    queue = 'casper',
    walltime = '5:00:00',
    log_directory = rda_scratch+'/dask/logs',
    #interface = 'ib0'
    interface = 'ext'
)

In [5]:
cluster.scale(3)

In [6]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/34507/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.97:38479,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/34507/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Open catalogs

In [7]:
cat = intake.open_esm_datastore('https://ncar-na-cordex.s3-us-west-2.amazonaws.com/catalogs/aws-na-cordex.json')
cat

Unnamed: 0,unique
variable,15
standard_name,10
long_name,18
units,10
spatial_domain,1
grid,2
spatial_resolution,2
scenario,6
start_time,3
end_time,4


In [8]:
cat.df

Unnamed: 0,variable,standard_name,long_name,units,spatial_domain,grid,spatial_resolution,scenario,start_time,end_time,frequency,vertical_levels,bias_correction,na-cordex-models,path
0,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,eval,1979-01-01T12:00:00,2014-12-31T12:00:00,day,1,raw,"['ERA-Int.CRCM5-UQAM', 'ERA-Int.CRCM5-OUR', 'E...",s3://ncar-na-cordex/day/hurs.eval.day.NAM-22i....
1,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-44i,0.50 deg,eval,1979-01-01T12:00:00,2015-12-31T12:00:00,day,1,raw,"['ERA-Int.CRCM5-UQAM', 'ERA-Int.RegCM4', 'ERA-...",s3://ncar-na-cordex/day/hurs.eval.day.NAM-44i....
2,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,hist-rcp45,1949-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-Daymet,['CanESM2.CanRCM4'],s3://ncar-na-cordex/day/hurs.hist-rcp45.day.NA...
3,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,hist-rcp45,1949-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-gridMET,['CanESM2.CanRCM4'],s3://ncar-na-cordex/day/hurs.hist-rcp45.day.NA...
4,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,hist-rcp45,1949-01-01T12:00:00,2100-12-31T12:00:00,day,1,raw,"['GFDL-ESM2M.CRCM5-OUR', 'CanESM2.CRCM5-OUR', ...",s3://ncar-na-cordex/day/hurs.hist-rcp45.day.NA...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,vas,northward_wind,Northward Near-Surface Wind,m s-1,north_america,NAM-44i,0.50 deg,rcp45,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,raw,"['MPI-ESM-LR.CRCM5-UQAM', 'CanESM2.CRCM5-UQAM'...",s3://ncar-na-cordex/day/vas.rcp45.day.NAM-44i....
326,vas,northward_wind,Northward Near-Surface Wind (Bias-Adjusted),m s-1,north_america,NAM-22i,0.25 deg,rcp85,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-gridMET,"['MPI-ESM-MR.CRCM5-UQAM', 'GEMatm-Can.CRCM5-UQ...",s3://ncar-na-cordex/day/vas.rcp85.day.NAM-22i....
327,vas,northward_wind,Northward Near-Surface Wind,m s-1,north_america,NAM-22i,0.25 deg,rcp85,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,raw,"['MPI-ESM-MR.CRCM5-UQAM', 'GEMatm-Can.CRCM5-UQ...",s3://ncar-na-cordex/day/vas.rcp85.day.NAM-22i....
328,vas,northward_wind,Northward Near-Surface Wind (Bias-Adjusted),m s-1,north_america,NAM-44i,0.50 deg,rcp85,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-gridMET,"['MPI-ESM-MR.CRCM5-UQAM', 'GEMatm-Can.CRCM5-UQ...",s3://ncar-na-cordex/day/vas.rcp85.day.NAM-44i....


In [9]:
df =  cat.df
df['path'].head().values

array(['s3://ncar-na-cordex/day/hurs.eval.day.NAM-22i.raw.zarr',
       's3://ncar-na-cordex/day/hurs.eval.day.NAM-44i.raw.zarr',
       's3://ncar-na-cordex/day/hurs.hist-rcp45.day.NAM-22i.mbcn-Daymet.zarr',
       's3://ncar-na-cordex/day/hurs.hist-rcp45.day.NAM-22i.mbcn-gridMET.zarr',
       's3://ncar-na-cordex/day/hurs.hist-rcp45.day.NAM-22i.raw.zarr'],
      dtype=object)

In [10]:
# df['path'] = df['path'].str.replace(s3_link,cesm_posix)
# df['path'] =  df['path'].str.replace(s3_link,cesm_lens)
df['path'] =  df['path'].str.replace(s3_link,osdf_link)
df

Unnamed: 0,variable,standard_name,long_name,units,spatial_domain,grid,spatial_resolution,scenario,start_time,end_time,frequency,vertical_levels,bias_correction,na-cordex-models,path
0,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,eval,1979-01-01T12:00:00,2014-12-31T12:00:00,day,1,raw,"['ERA-Int.CRCM5-UQAM', 'ERA-Int.CRCM5-OUR', 'E...",osdf:///ncar/rda/d316009/day/hurs.eval.day.NAM...
1,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-44i,0.50 deg,eval,1979-01-01T12:00:00,2015-12-31T12:00:00,day,1,raw,"['ERA-Int.CRCM5-UQAM', 'ERA-Int.RegCM4', 'ERA-...",osdf:///ncar/rda/d316009/day/hurs.eval.day.NAM...
2,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,hist-rcp45,1949-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-Daymet,['CanESM2.CanRCM4'],osdf:///ncar/rda/d316009/day/hurs.hist-rcp45.d...
3,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,hist-rcp45,1949-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-gridMET,['CanESM2.CanRCM4'],osdf:///ncar/rda/d316009/day/hurs.hist-rcp45.d...
4,hurs,relative_humidity,Near-Surface Relative Humidity,%,north_america,NAM-22i,0.25 deg,hist-rcp45,1949-01-01T12:00:00,2100-12-31T12:00:00,day,1,raw,"['GFDL-ESM2M.CRCM5-OUR', 'CanESM2.CRCM5-OUR', ...",osdf:///ncar/rda/d316009/day/hurs.hist-rcp45.d...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,vas,northward_wind,Northward Near-Surface Wind,m s-1,north_america,NAM-44i,0.50 deg,rcp45,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,raw,"['MPI-ESM-LR.CRCM5-UQAM', 'CanESM2.CRCM5-UQAM'...",osdf:///ncar/rda/d316009/day/vas.rcp45.day.NAM...
326,vas,northward_wind,Northward Near-Surface Wind (Bias-Adjusted),m s-1,north_america,NAM-22i,0.25 deg,rcp85,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-gridMET,"['MPI-ESM-MR.CRCM5-UQAM', 'GEMatm-Can.CRCM5-UQ...",osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...
327,vas,northward_wind,Northward Near-Surface Wind,m s-1,north_america,NAM-22i,0.25 deg,rcp85,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,raw,"['MPI-ESM-MR.CRCM5-UQAM', 'GEMatm-Can.CRCM5-UQ...",osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...
328,vas,northward_wind,Northward Near-Surface Wind (Bias-Adjusted),m s-1,north_america,NAM-44i,0.50 deg,rcp85,2006-01-01T12:00:00,2100-12-31T12:00:00,day,1,mbcn-gridMET,"['MPI-ESM-MR.CRCM5-UQAM', 'GEMatm-Can.CRCM5-UQ...",osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...


In [11]:
# %%time
# df.to_csv(rda_data +'harshah/intake_catalogs/posix/na-cordex.csv',index='False')
# df.to_csv(rda_data +'harshah/intake_catalogs/https/na-cordex.csv',index='False')
# df.to_csv(rda_data +'harshah/intake_catalogs/osdf/na-cordex/na-cordex.csv',index='False')

In [13]:
# cat1 = intake.open_esm_datastore(rda_data + 'harshah/intake_catalogs/posix/na-cordex.json')
# cat2 = intake.open_esm_datastore(rda_data + 'harshah/intake_catalogs/https/na-cordex.json')
cat3 = intake.open_esm_datastore(rda_url + 'harshah/intake_catalogs/osdf/na-cordex/na-cordex.json')
cat3

Unnamed: 0,unique
Unnamed: 0,330
variable,15
standard_name,10
long_name,18
units,10
spatial_domain,1
grid,2
spatial_resolution,2
scenario,6
start_time,3


In [None]:
cat1.df['path']

In [None]:
cat2.df['path']

In [14]:
cat3.df['path']

0      osdf:///ncar/rda/d316009/day/hurs.eval.day.NAM...
1      osdf:///ncar/rda/d316009/day/hurs.eval.day.NAM...
2      osdf:///ncar/rda/d316009/day/hurs.hist-rcp45.d...
3      osdf:///ncar/rda/d316009/day/hurs.hist-rcp45.d...
4      osdf:///ncar/rda/d316009/day/hurs.hist-rcp45.d...
                             ...                        
325    osdf:///ncar/rda/d316009/day/vas.rcp45.day.NAM...
326    osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...
327    osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...
328    osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...
329    osdf:///ncar/rda/d316009/day/vas.rcp85.day.NAM...
Name: path, Length: 330, dtype: object

## Open the saved catalog,load some data and plot

In [15]:
cat_rh = cat3.search(variable ='hurs', frequency ='day')
cat_rh

Unnamed: 0,unique
Unnamed: 0,32
variable,1
standard_name,1
long_name,1
units,1
spatial_domain,1
grid,2
spatial_resolution,2
scenario,6
start_time,3


In [16]:
dsets = cat_rh.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'variable.frequency.scenario.grid.bias_correction'
