In [2]:
import glob
import pathlib
import traceback
from datetime import datetime

import xarray as xr

from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_timeseries
import intake

In [14]:
# Open collection description file
catalog_url = "https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json"
col = intake.open_esm_datastore(catalog_url)
dsets = col.search(variable='TREFHT', frequency='daily')

Unnamed: 0,unique
variable,77
long_name,74
component,5
experiment,4
frequency,6
vertical_levels,3
spatial_domain,5
units,25
start_time,12
end_time,13


In [2]:
b = Builder('/glade/campaign/cgd/cesm/CESM2-LE/timeseries/',
            depth=0, njobs=144)

In [3]:
b.build(parse_cesm_timeseries)

[Parallel(n_jobs=144)]: Using backend LokyBackend with 144 concurrent workers.
[Parallel(n_jobs=144)]: Done   3 out of   6 | elapsed:   16.8s remaining:   16.8s
[Parallel(n_jobs=144)]: Done   6 out of   6 | elapsed:  1.5min finished
[Parallel(n_jobs=144)]: Using backend LokyBackend with 144 concurrent workers.
[Parallel(n_jobs=144)]: Done 162 tasks      | elapsed:   19.3s
[Parallel(n_jobs=144)]: Done 360 tasks      | elapsed:   22.4s
[Parallel(n_jobs=144)]: Done 594 tasks      | elapsed:   25.5s
[Parallel(n_jobs=144)]: Done 864 tasks      | elapsed:   29.8s
[Parallel(n_jobs=144)]: Done 1170 tasks      | elapsed:   34.0s
[Parallel(n_jobs=144)]: Done 1512 tasks      | elapsed:   38.2s
[Parallel(n_jobs=144)]: Done 1890 tasks      | elapsed:   42.6s
[Parallel(n_jobs=144)]: Done 2304 tasks      | elapsed:   47.7s
[Parallel(n_jobs=144)]: Done 2754 tasks      | elapsed:   52.5s
[Parallel(n_jobs=144)]: Done 3240 tasks      | elapsed:   57.8s
[Parallel(n_jobs=144)]: Done 3762 tasks      | elaps

Builder(root_path=PosixPath('/glade/campaign/cgd/cesm/CESM2-LE/timeseries'), extension='.nc', depth=0, exclude_patterns=None, njobs=144)

In [4]:
cases = sorted(b.df.case.unique())

In [5]:
cases

['b.e21.BHISTcmip6.f09_g17.LE2-1001.001',
 'b.e21.BHISTcmip6.f09_g17.LE2-1021.002',
 'b.e21.BHISTcmip6.f09_g17.LE2-1041.003',
 'b.e21.BHISTcmip6.f09_g17.LE2-1061.004',
 'b.e21.BHISTcmip6.f09_g17.LE2-1081.005',
 'b.e21.BHISTcmip6.f09_g17.LE2-1101.006',
 'b.e21.BHISTcmip6.f09_g17.LE2-1121.007',
 'b.e21.BHISTcmip6.f09_g17.LE2-1141.008',
 'b.e21.BHISTcmip6.f09_g17.LE2-1161.009',
 'b.e21.BHISTcmip6.f09_g17.LE2-1181.010',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.001',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.002',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.003',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.004',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.005',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.006',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.007',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.008',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.009',
 'b.e21.BHISTcmip6.f09_g17.LE2-1231.010',
 'b.e21.BHISTcmip6.f09_g17.LE2-1251.001',
 'b.e21.BHISTcmip6.f09_g17.LE2-1251.002',
 'b.e21.BHISTcmip6.f09_g17.LE2-1251.003',
 'b.e21.BHISTcmip6.f09_g17.LE2-125

In [4]:
import pandas as pd
df = pd.read_csv('/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cesm2-le.csv.gz')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [5]:
cases = df.case.unique()

In [6]:
good_case_list = []
for case in cases:
    if 'Forcing' not in case:
        good_case_list.append(case)

In [7]:
experiment_mapping = {'BHISTcmip6':{'experiment':'historical',
                                    'forcing_variant':'cmip6'},
                      'BHISTsmbb':{'experiment':'historical',
                                   'forcing_variant':'smbb'},
                      'BSSP370cmip6':{'experiment':'ssp370',
                                      'forcing_variant':'cmip6'},
                      'BSSP370smbb':{'experiment':'ssp370',
                                     'forcing_variant':'smbb'}
                     }

In [8]:
def add_cmip_member_id(member_id):
    member_id_mapping = {'1001.001':'r1i1001p1f1',
                         '1021.002':'r2i1021p1f1',
                         '1041.003':'r3i1041p1f1',
                         '1061.004':'r4i1061p1f1',
                         '1081.005':'r5i1081p1f1',
                         '1101.006':'r6i1101p1f1',
                         '1121.007':'r7i1121p1f1',
                         '1141.008':'r8i1141p1f1',
                         '1161.009':'r9i1161p1f1',
                         '1181.010':'r10i1181p1f1',
                         '1011.001':'r1i1011p1f2',
                         '1031.002':'r2i1031p1f2',
                         '1051.003':'r3i1051p1f2',
                         '1071.004':'r4i1071p1f2',
                         '1091.005':'r5i1091p1f2',
                         '1111.006':'r6i1111p1f2',
                         '1131.007':'r7i1131p1f2',
                         '1151.008':'r8i1151p1f2',
                         '1171.009':'r9i1171p1f2',
                         '1191.010':'r10i1191p1f2',
                         '1231.001':'r1i1231p1f1',
                         '1231.002':'r2i1231p1f1',
                         '1231.003':'r3i1231p1f1',
                         '1231.004':'r4i1231p1f1',
                         '1231.005':'r5i1231p1f1',
                         '1231.006':'r6i1231p1f1',
                         '1231.007':'r7i1231p1f1',
                         '1231.008':'r8i1231p1f1',
                         '1231.009':'r9i1231p1f1',
                         '1231.010':'r10i1231p1f1',
                         '1231.011':'r11i1231p1f2',
                         '1231.012':'r12i1231p1f2',
                         '1231.013':'r13i1231p1f2',
                         '1231.014':'r14i1231p1f2',
                         '1231.015':'r15i1231p1f2',
                         '1231.016':'r16i1231p1f2',
                         '1231.017':'r17i1231p1f2',
                         '1231.018':'r18i1231p1f2',
                         '1231.019':'r19i1231p1f2',
                         '1231.020':'r20i1231p1f2',
                         '1251.001':'r1i1251p1f1',
                         '1251.002':'r2i1251p1f1',
                         '1251.003':'r3i1251p1f1',
                         '1251.004':'r4i1251p1f1',
                         '1251.005':'r5i1251p1f1',
                         '1251.006':'r6i1251p1f1',
                         '1251.007':'r7i1251p1f1',
                         '1251.008':'r8i1251p1f1',
                         '1251.009':'r9i1251p1f1',
                         '1251.010':'r10i1251p1f1',
                         '1251.011':'r11i1251p1f2',
                         '1251.012':'r12i1251p1f2',
                         '1251.013':'r13i1251p1f2',
                         '1251.014':'r14i1251p1f2',
                         '1251.015':'r15i1251p1f2',
                         '1251.016':'r16i1251p1f2',
                         '1251.017':'r17i1251p1f2',
                         '1251.018':'r18i1251p1f2',
                         '1251.019':'r19i1251p1f2',
                         '1251.020':'r20i1251p1f2',
                         '1281.001':'r1i1281p1f1',
                         '1281.002':'r2i1281p1f1',
                         '1281.003':'r3i1281p1f1',
                         '1281.004':'r4i1281p1f1',
                         '1281.005':'r5i1281p1f1',
                         '1281.006':'r6i1281p1f1',
                         '1281.007':'r7i1281p1f1',
                         '1281.008':'r8i1281p1f1',
                         '1281.009':'r9i1281p1f1',
                         '1281.010':'r10i1281p1f1',
                         '1281.011':'r11i1281p1f2',
                         '1281.012':'r12i1281p1f2',
                         '1281.013':'r13i1281p1f2',
                         '1281.014':'r14i1281p1f2',
                         '1281.015':'r15i1281p1f2',
                         '1281.016':'r16i1281p1f2',
                         '1281.017':'r17i1281p1f2',
                         '1281.018':'r18i1281p1f2',
                         '1281.019':'r19i1281p1f2',
                         '1281.020':'r20i1281p1f2',
                         '1301.001':'r1i1301p1f1',
                         '1301.002':'r2i1301p1f1',
                         '1301.003':'r3i1301p1f1',
                         '1301.004':'r4i1301p1f1',
                         '1301.005':'r5i1301p1f1',
                         '1301.006':'r6i1301p1f1',
                         '1301.007':'r7i1301p1f1',
                         '1301.008':'r8i1301p1f1',
                         '1301.009':'r9i1301p1f1',
                         '1301.010':'r10i1301p1f1',
                         '1301.011':'r11i1301p1f2',
                         '1301.012':'r12i1301p1f2',
                         '1301.013':'r13i1301p1f2',
                         '1301.014':'r14i1301p1f2',
                         '1301.015':'r15i1301p1f2',
                         '1301.016':'r16i1301p1f2',
                         '1301.017':'r17i1301p1f2',
                         '1301.018':'r18i1301p1f2',
                         '1301.019':'r19i1301p1f2',
                         '1301.020':'r20i1301p1f2',
                        }
    return member_id_mapping[member_id]

In [8]:
import pandas as pd
df = pd.read_csv('/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.gz')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [9]:
df = b.df

In [9]:
add_cmip_member_id('1231.011')

'r11i1231p1f2'

In [10]:
def parse_case_name(case_name):
    new_fields = {}
    
    new_fields.update(experiment_mapping[case_name.split('.')[2]])
    member_id = case_name.split('-')[1]
    control_branch_year = member_id.split('.')[0]
    cmip_member_id = add_cmip_member_id(member_id)
    
    new_fields.update({'cesm_member_id':member_id,
                       'control_branch_year':int(control_branch_year),
                       'member_id':cmip_member_id,
                       'cmip_experiment_id':f"CESM2_{new_fields['experiment']}_{cmip_member_id}"})
    
    return new_fields

In [11]:
new_fields = df.case.apply(parse_case_name)

In [12]:
import pandas as pd
new_fields_df = pd.DataFrame(list(new_fields.values))

In [13]:
for column in new_fields_df.columns:
    df[column] = new_fields_df[column]

In [14]:
df_revised = df

In [15]:
print(len(df), len(df_revised))

5359843 5359843


In [18]:
df_revised.loc[(df_revised.control_branch_year == 1301) &
               (df_revised.stream == 'pop.h') &
               (df_revised.variable == 'TEMP') &
               (df_revised.experiment == 'historical')].case.unique()

array(['b.e21.BHISTcmip6.f09_g17.LE2-1301.001',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.002',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.003',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.004',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.005',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.006',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.007',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.008',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.009',
       'b.e21.BHISTcmip6.f09_g17.LE2-1301.010',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.011',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.012',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.013',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.014',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.015',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.016',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.017',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.018',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.019',
       'b.e21.BHISTsmbb.f09_g17.LE2-1301.020'], dtype=object)

In [20]:
df = df_revised

In [21]:
df['cesm_member_id'] = df.cesm_member_id.astype(str)
df['control_branch_year'] = df.control_branch_year.astype(int)

In [23]:
df

Unnamed: 0,component,stream,case,member_id,variable,start_time,end_time,time_range,long_name,units,vertical_levels,frequency,path,experiment,forcing_variant,cesm_member_id,control_branch_year,cmip_experiment_id
0,atm,cam.h1,b.e21.BHISTcmip6.f09_g17.LE2-1001.001,r1i1001p1f1,ACTNI,1850-01-01,1859-12-31,18500101-18591231,Average Cloud Top ice number,m-3,1.0,day_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/a...,historical,cmip6,1001.001,1001,CESM2_historical_r1i1001p1f1
1,atm,cam.h1,b.e21.BHISTcmip6.f09_g17.LE2-1001.001,r1i1001p1f1,ACTNI,1860-01-01,1869-12-31,18600101-18691231,Average Cloud Top ice number,m-3,1.0,day_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/a...,historical,cmip6,1001.001,1001,CESM2_historical_r1i1001p1f1
2,atm,cam.h1,b.e21.BHISTcmip6.f09_g17.LE2-1001.001,r1i1001p1f1,ACTNI,1870-01-01,1879-12-31,18700101-18791231,Average Cloud Top ice number,m-3,1.0,day_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/a...,historical,cmip6,1001.001,1001,CESM2_historical_r1i1001p1f1
3,atm,cam.h1,b.e21.BHISTcmip6.f09_g17.LE2-1001.001,r1i1001p1f1,ACTNI,1880-01-01,1889-12-31,18800101-18891231,Average Cloud Top ice number,m-3,1.0,day_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/a...,historical,cmip6,1001.001,1001,CESM2_historical_r1i1001p1f1
4,atm,cam.h1,b.e21.BHISTcmip6.f09_g17.LE2-1001.001,r1i1001p1f1,ACTNI,1890-01-01,1899-12-31,18900101-18991231,Average Cloud Top ice number,m-3,1.0,day_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/a...,historical,cmip6,1001.001,1001,CESM2_historical_r1i1001p1f1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5359838,rof,mosart.h0,b.e21.BSSP370smbb.f09_g17.LE2-1301.018,r18i1301p1f2,TOTAL_DISCHARGE_TO_OCEAN_LIQ,2075-01,2084-12,207501-208412,MOSART total discharge into ocean: LIQ,m3/s,1.0,month_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/r...,ssp370,smbb,1301.018,1301,CESM2_ssp370_r18i1301p1f2
5359839,rof,mosart.h0,b.e21.BSSP370smbb.f09_g17.LE2-1301.018,r18i1301p1f2,TOTAL_DISCHARGE_TO_OCEAN_LIQ,2085-01,2094-12,208501-209412,MOSART total discharge into ocean: LIQ,m3/s,1.0,month_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/r...,ssp370,smbb,1301.018,1301,CESM2_ssp370_r18i1301p1f2
5359840,rof,mosart.h0,b.e21.BSSP370smbb.f09_g17.LE2-1301.018,r18i1301p1f2,TOTAL_DISCHARGE_TO_OCEAN_LIQ,2095-01,2100-12,209501-210012,MOSART total discharge into ocean: LIQ,m3/s,1.0,month_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/r...,ssp370,smbb,1301.018,1301,CESM2_ssp370_r18i1301p1f2
5359841,rof,mosart.h0,b.e21.BSSP370smbb.f09_g17.LE2-1301.019,r19i1301p1f2,TOTAL_DISCHARGE_TO_OCEAN_LIQ,2015-01,2024-12,201501-202412,MOSART total discharge into ocean: LIQ,m3/s,1.0,month_1,/glade/campaign/cgd/cesm/CESM2-LE/timeseries/r...,ssp370,smbb,1301.019,1301,CESM2_ssp370_r19i1301p1f2


In [24]:
df.to_csv('/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.gz', index=False)

In [25]:
from ncar_jobqueue import NCARCluster
from distributed import Client
cluster = NCARCluster()
cluster.scale(10)
client = Client(cluster)
client

  from distributed.utils import format_bytes, parse_bytes, tmpfile
  from distributed.utils import format_bytes, parse_bytes, tmpfile
  from distributed.utils import format_bytes, parse_bytes, tmpfile
  from distributed.utils import parse_bytes


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.54:45565,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [26]:
catalog = intake.open_esm_datastore('/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.json')
catalog

  self._df, self.catalog_file = _fetch_catalog(self.esmcol_data, esmcol_obj, csv_kwargs)


Unnamed: 0,unique
component,6
stream,26
case,200
member_id,100
variable,1906
start_time,157
end_time,180
time_range,163
long_name,1800
units,184


In [33]:
oceandata = catalog.search(component='ocn',
                       variable=['TEMP'],
                       frequency='month_1',
                       experiment=['historical','ssp370'])

In [34]:
oceandata.search(experiment='historical',
               forcing_variant='cmip6').df.case.unique()

array(['b.e21.BHISTcmip6.f09_g17.LE2-1001.001',
       'b.e21.BHISTcmip6.f09_g17.LE2-1021.002',
       'b.e21.BHISTcmip6.f09_g17.LE2-1041.003',
       'b.e21.BHISTcmip6.f09_g17.LE2-1061.004',
       'b.e21.BHISTcmip6.f09_g17.LE2-1081.005',
       'b.e21.BHISTcmip6.f09_g17.LE2-1101.006',
       'b.e21.BHISTcmip6.f09_g17.LE2-1121.007',
       'b.e21.BHISTcmip6.f09_g17.LE2-1141.008',
       'b.e21.BHISTcmip6.f09_g17.LE2-1161.009',
       'b.e21.BHISTcmip6.f09_g17.LE2-1181.010',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.001',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.002',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.003',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.004',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.005',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.006',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.007',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.008',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.009',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.010',
       'b.e21.BHISTcmip6.f09_g17.LE2-125

In [29]:
catalog.search(experiment='historical',
               stream='pop.h',
               forcing_variant='cmip6').df.case.unique()

array(['b.e21.BHISTcmip6.f09_g17.LE2-1001.001',
       'b.e21.BHISTcmip6.f09_g17.LE2-1021.002',
       'b.e21.BHISTcmip6.f09_g17.LE2-1041.003',
       'b.e21.BHISTcmip6.f09_g17.LE2-1061.004',
       'b.e21.BHISTcmip6.f09_g17.LE2-1081.005',
       'b.e21.BHISTcmip6.f09_g17.LE2-1101.006',
       'b.e21.BHISTcmip6.f09_g17.LE2-1121.007',
       'b.e21.BHISTcmip6.f09_g17.LE2-1141.008',
       'b.e21.BHISTcmip6.f09_g17.LE2-1161.009',
       'b.e21.BHISTcmip6.f09_g17.LE2-1181.010',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.001',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.002',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.003',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.004',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.005',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.006',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.007',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.008',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.009',
       'b.e21.BHISTcmip6.f09_g17.LE2-1231.010',
       'b.e21.BHISTcmip6.f09_g17.LE2-125

In [37]:
b.save(
    '/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.gz',
    # Column name including filepath
    path_column_name='path',
    # Column name including variables
    variable_column_name='variable',
    # Data file format - could be netcdf or zarr (in this case, netcdf)
    data_format="netcdf",
    # Which attributes to groupby when reading in variables using intake-esm
    groupby_attrs=["component",
                   "experiment",
                   "stream",
                   "forcing_variant",
                   "variable"
                  ],
    
    # Aggregations which are fed into xarray when reading in data using intake
    aggregations=[
        {'type': 'union', 'attribute_name': 'variable'},
        {
            "type": "join_existing",
            "attribute_name": "time_range",
            "options": {"dim": "time", "coords": "minimal", "compat": "override"},
        },
        {
        "type": "join_new",
        "attribute_name": "member_id",
        "options": { "coords": "minimal", "compat": "override" }
        }
    ],
)

Saved catalog location: /glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.json and /glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.gz


  b.save(


In [193]:
catalog = intake.open_esm_datastore('/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.json')

In [195]:
catalog_subset = catalog.search(component='ocn',
                                frequency='month_1',
                                variable='TEMP',
                                control_branch_year=1251,
                                mem)

In [200]:
df = pd.read_csv('/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.gz')

In [204]:
df['member_id'] = df.member_id.astype(str)

In [205]:
df.to_csv('/glade/work/mgrover/intake-esm-catalogs/glade-cesm2-le.csv.gz', index=False)

In [170]:
catalog_subset.df.path.values

array(['/glade/campaign/cgd/cesm/CESM2-LE/.data_management/incorrect_data/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.190001-190912.nc',
       '/glade/campaign/cgd/cesm/CESM2-LE/.data_management/incorrect_data/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.191001-191912.nc',
       '/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/month_1/TEMP/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.185001-185912.nc',
       '/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/month_1/TEMP/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.186001-186912.nc',
       '/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/month_1/TEMP/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.187001-187912.nc',
       '/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/month_1/TEMP/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.188001-188912.nc',
       '/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/month_1/TEMP/b.e21.BHISTsmbb.f09_g17.LE2-1251.013.pop.h.TEMP.189001-189912.nc',
       '/glade/campaign

In [43]:
data_catalog = intake.open_esm_datastore('/glade/u/home/mgrover/projects/git_repos/intake-esm-datastore/catalogs/glade-cesm2-le.json')
data_catalog

  self._df, self.catalog_file = _fetch_catalog(self.esmcol_data, esmcol_obj, csv_kwargs)


Unnamed: 0,unique
component,6
stream,26
case,200
member_id,100
variable,1906
start_time,157
end_time,180
time_range,163
long_name,1800
units,184


In [30]:
from ncar_jobqueue import NCARCluster
from distributed import Client

cluster = NCARCluster()
cluster.scale(10)
client = Client(cluster)

  from distributed.utils import format_bytes, parse_bytes, tmpfile
  from distributed.utils import format_bytes, parse_bytes, tmpfile
  from distributed.utils import format_bytes, parse_bytes, tmpfile
  from distributed.utils import parse_bytes
Perhaps you already have a cluster running?
Hosting the HTTP server on port 38456 instead


In [31]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/38456/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/38456/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.50:33777,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/38456/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [46]:
data_catalog.search(component='ice', variable='aice', frequency='month_1')

Unnamed: 0,unique
component,1
stream,1
case,200
member_id,100
variable,1
start_time,26
end_time,26
time_range,26
long_name,1
units,1
