In [1]:
import intake
import xarray as xr
import os 
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.filterwarnings("ignore")

get some CMIP6 data from GCS

here we're going to get daily `tmax` from `IPSL` for historical and SSP370 runs. The ensemble member `r1i1p1f1` isn't available in GCS so we're using `r4i1p1f1` instead. 

Note that the `activity_id` for historical runs is `CMIP`, not `ScenarioMIP` as it is for the ssp-rcp scenarios. 

In [3]:
activity_id = 'ScenarioMIP'
experiment_id = 'ssp370'
table_id = 'day'
variable_id = 'tasmax'
source_id = 'IPSL-CM6A-LR'
institution_id = 'NCAR'
member_id = 'r4i1p1f1'

first we'll take a look at what our options are

In [4]:
df_cmip6 = pd.read_csv('https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores-noQC.csv', dtype={'version': 'unicode'})
len(df_cmip6)

351331

In [5]:
df_subset_future = df_cmip6.loc[(df_cmip6['activity_id'] == activity_id) & (df_cmip6['experiment_id'] == experiment_id) 
             & (df_cmip6['table_id'] == table_id) & (df_cmip6['variable_id'] == variable_id)
             & (df_cmip6['source_id'] == source_id) & (df_cmip6['member_id'] == member_id)]

In [6]:
df_subset_future 

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version,status,severity,issue_url
319665,ScenarioMIP,IPSL,IPSL-CM6A-LR,ssp370,r4i1p1f1,day,tasmax,gr,gs://cmip6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp37...,,20190614,good,none,none


In [7]:
df_subset_hist = df_cmip6.loc[(df_cmip6['experiment_id'] == 'historical') 
             & (df_cmip6['table_id'] == table_id) & (df_cmip6['variable_id'] == variable_id) 
             & (df_cmip6['source_id'] == source_id) & (df_cmip6['member_id'] == member_id)]

In [8]:
df_subset_hist

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version,status,severity,issue_url
53406,CMIP,IPSL,IPSL-CM6A-LR,historical,r4i1p1f1,day,tasmax,gr,gs://cmip6/CMIP/IPSL/IPSL-CM6A-LR/historical/r...,,20190614,good,none,none


now let's actually pull the data 

In [9]:
# search the cmip6 catalog
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

cat = col.search(activity_id=['CMIP', activity_id], 
                 experiment_id=['historical', experiment_id], table_id=table_id, variable_id=variable_id,
                 source_id=source_id, member_id=member_id)

In [32]:
ds_model = {}
ds_model['historical'] = cat['CMIP.IPSL.IPSL-CM6A-LR.historical.day.gr'].to_dask().isel(member_id=0
                                                                                       ).squeeze(drop=True).drop(['member_id', 
                                                                                                                  'height',
                                                                                                                  'time_bounds'])

In [33]:
ds_model['ssp370'] = cat['ScenarioMIP.IPSL.IPSL-CM6A-LR.ssp370.day.gr'].to_dask().isel(member_id=0
                                                                                       ).squeeze(drop=True).drop(['member_id',
                                                                                                                  'height',
                                                                                                                  'time_bounds'])