In [1]:
%matplotlib inline 
import xarray as xr
import os 
import pandas as pd
import numpy as np

import json

load CMIP6 source id file, pulled from the WRCP_CMIP6_CVs GitHub repo: https://github.com/WCRP-CMIP/CMIP6_CVs

In [2]:
cmip6_file = '/home/jovyan/CMIP6_CVs/CMIP6_source_id.json'

In [3]:
with open(cmip6_file) as f:
  data = json.load(f)

In [128]:
# data['source_id']['FGOALS-g3']

In [104]:
def parse_grid_description_str(grid_desc_str):
    '''
    used to parse the grid description for the atmosphere model 
    '''
    parsed_str = grid_desc_str.split('(', 1)[1].split(';')
    if len(parsed_str) == 2:
        return(parsed_str[1])
    elif len(parsed_str) == 3:
        return(parsed_str[0])
    elif len(parsed_str) == 4:
        return(parsed_str[1])

create DataFrame with resolution, grid info, modeling center, and whether or not the model is currently available in the CMIP6 google cloud archive

In [152]:
import intake

# search the cmip6 catalog
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
cat = col.search(experiment_id=['historical', 'ssp126', 'ssp245', 'ssp370'], 
                 table_id='day', variable_id=['tasmax', 'tasmin', 'pr'])

In [153]:
models_in_cmip6_archive = cat.df.source_id.unique()

In [158]:
grid_info = {}

for model in data['source_id'].keys():
    if 'ScenarioMIP' in data['source_id'][model]['activity_participation']:
        modeling_center = data['source_id'][model]['institution_id'][0]
        atmos_model = data['source_id'][model]['model_component']['atmos']
        grid_desc = parse_grid_description_str(atmos_model['description'])
        if model in models_in_cmip6_archive:
            cloudified = 'yes'
        else:
            cloudified = 'no'
        grid_info[model] = {'modeling_center': modeling_center, 
                            'GCS archive?': cloudified,
                            'resolution': atmos_model['native_nominal_resolution'], 
                            'grid_description': grid_desc}
        
df_grid = pd.DataFrame.from_dict(grid_info, orient='index')

In [162]:
df_grid.head()

Unnamed: 0,modeling_center,GCS archive?,resolution,grid_description
ACCESS-CM2,CSIRO-ARCCSS,yes,250 km,192 x 144 longitude/latitude
ACCESS-ESM1-5,CSIRO,yes,250 km,192 x 145 longitude/latitude
AWI-CM-1-1-LR,AWI,no,250 km,192 x 96 longitude/latitude
AWI-CM-1-1-MR,AWI,yes,100 km,384 x 192 longitude/latitude
BCC-CSM2-MR,BCC,yes,100 km,320 x 160 longitude/latitude


In [160]:
df_grid.to_csv('CMIP6_grid_info.csv')

look at grid labels for our MVP variables/experiments

In [None]:
cat_nogrids = col.search(activity_id=['CMIP', 'ScenarioMIP'], experiment_id=['historical', 'ssp126', 'ssp245', 'ssp370'], 
                         table_id='day', variable_id=['tasmax', 'tasmin', 'pr'])

In [None]:
cat_nogrids.df.groupby("grid_label")[["experiment_id", "member_id", "source_id"]
].nunique()