# Data availability

In [33]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from scipy.signal import detrend
from matplotlib import pyplot as plt
from scipy import signal
import pandas as pd
import xarray as xr
import intake
import pprint 
import util 

In [34]:
if util.is_ncar_host():
    col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
else:
    col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")
col;

In [97]:
exp_list = ['piControl', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'abrupt-4xCO2']

df = pd.DataFrame()
for chosen_exp in exp_list:
    print(chosen_exp)
    cat = col.search(experiment_id = chosen_exp, variable_id='ts', table_id='Amon')
    
    # the following code removes models that causes errors in .to_dataset_dict:
    uni_dict = cat.unique(['source_id'])
    models = uni_dict['source_id']['values']; 
    source_ids = models[:]
    if chosen_exp == 'ssp126':
        remove_models = ['EC-Earth3'] # EC-Earth3_ssp126_r15i1p1f1 is missing many years
    elif chosen_exp == 'ssp245':
        remove_models = ['UKESM1-0-LL', 'EC-Earth3'] # UKESM1-0-LL_ssp245_r8i1p1f2 causes error that I don't understand
    else:
        remove_models = []
    for mod in remove_models:
        source_ids.remove(mod) # remove models
        df.loc[mod, chosen_exp + ' (ens.mem.)'] = 'data problem'
    # load new table without the problematic models:
    cat = col.search(source_id = source_ids, experiment_id = chosen_exp, variable_id='ts', table_id='Amon')
    
    
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})
    keys = []
    for key in dset_dict.keys():
        keys.append(key)

    for key in keys:
        ds = dset_dict[key]
        model = ds.source_id
        start_time = str(ds['time'][0].values)[:7]
        end_time = str(ds['time'][-1].values)[:7]
        run_length = int(end_time[:4]) + 1 - int(start_time[:4])
        df.loc[model,chosen_exp + ' (yrs)'] = run_length
        df.loc[model,chosen_exp + ' (ens.mem.)'] = ds.member_id.size # number of ensemble members

    

piControl

xarray will load netCDF datasets with dask using a single chunk for all arrays.
For effective chunking, please provide chunks in cdf_kwargs.
For example: cdf_kwargs={'chunks': {'time': 36}}

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 28 group(s)
historical

xarray will load netCDF datasets with dask using a single chunk for all arrays.
For effective chunking, please provide chunks in cdf_kwargs.
For example: cdf_kwargs={'chunks': {'time': 36}}

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 26 group(s)
ssp126

xarray will load netCDF datasets with dask using a single chunk for all arrays.
For effective chunking, please provide chunks in cdf_kwargs.
For example: cdf_kwargs={'chunks': {'time': 36}}

--> The keys in the return

In [119]:
column_names = list(df.columns);
yr_cols = [col for col in column_names if col[-5:] == '(yrs)']
ens_cols = [col for col in column_names if col[-10:] == '(ens.mem.)']
df2 = df[ens_cols + yr_cols]
df2
#df2.to_csv('available_data.txt', sep='\t')

Unnamed: 0,piControl (ens.mem.),historical (ens.mem.),ssp126 (ens.mem.),ssp245 (ens.mem.),ssp370 (ens.mem.),ssp585 (ens.mem.),abrupt-4xCO2 (ens.mem.),piControl (yrs),historical (yrs),ssp126 (yrs),ssp245 (yrs),ssp370 (yrs),ssp585 (yrs),abrupt-4xCO2 (yrs)
BCC-CSM2-MR,1.0,3.0,,1,,1.0,1.0,600.0,165.0,,86.0,,86.0,151.0
BCC-ESM1,1.0,3.0,,,3.0,,1.0,451.0,165.0,,,41.0,,151.0
CAMS-CSM1-0,1.0,2.0,1,1,1.0,1.0,2.0,250.0,165.0,85.0,85.0,85.0,85.0,155.0
FGOALS-g3,1.0,3.0,1,1,1.0,1.0,,670.0,167.0,86.0,80.0,81.0,81.0,
CanESM5,2.0,44.0,34,38,43.0,42.0,2.0,1000.0,165.0,286.0,86.0,86.0,166.0,151.0
CNRM-CM6-1,1.0,14.0,4,5,6.0,5.0,6.0,500.0,165.0,86.0,86.0,86.0,86.0,10.0
CNRM-ESM2-1,1.0,5.0,5,3,3.0,3.0,3.0,500.0,165.0,86.0,86.0,86.0,86.0,150.0
E3SM-1-0,1.0,5.0,,,,,1.0,500.0,165.0,,,,,125.0
EC-Earth3,1.0,12.0,data problem,data problem,7.0,6.0,,501.0,166.0,,,86.0,86.0,
EC-Earth3-Veg,1.0,4.0,3,3,3.0,3.0,1.0,496.0,165.0,86.0,86.0,86.0,86.0,151.0
