# Load CMIP6 Data from Pangeo

The commented code boxes below recreate the data loaded in the previous line.

It has been commented out as this code is NOT NECESSARY for this tutorial to run. Please DO NOT run it straightaway, the code takes quite long time to run and uses significant computing resources. However, the code is provided to give a better idea of how data can be obtained through [Pangeo](https://pangeo.io/) - which is an immensely valuable resource we have introduced previously. 

In this way you can access large amounts of climate model output that has been stored in the cloud. This is very useful to get easy access to such information. Feel free to modify the code to access different data and address your own questions.

In [None]:
# import intake 
from xmip.preprocessing import combined_preprocessing
from xarrayutils.plotting import shaded_line_plot

from xmip.utils import google_cmip_col
# we could do all of this with pure pandas on the underlying csv file
col = google_cmip_col()

In [None]:
cat = col.search(
    source_id=['MPI-ESM1-2-HR','MIROC6'],
    variable_id=['pr','tas'],
    member_id='r1i1p1f1', #
    table_id='day',
    grid_label='gn',
    experiment_id = ['historical','ssp126', 'ssp245', 'ssp585'],
    require_all_on = ['experiment_id','variable_id']
)
kwargs = dict(preprocess=combined_preprocessing, xarray_open_kwargs=dict(use_cftime=True))
ds_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True})

Define one or more locations

In [1]:
sel_Hamburg = dict(lon=10,lat=53.5)
sel_Madrid = dict(lon=360-42,lat=40.5)
sel_Delhi = dict(lon=77,lat=28.5)
sel_Kinshasa = dict(lon=15,lat=-4)
sel_Phoenix = dict(lon=360-112,lat=33.5)
sel_Sydney = dict(lon=151,lat=-33.85)

sels = dict(
    Hamburg = sel_Hamburg,
    Madrid = sel_Madrid,
    Delhi = sel_Delhi,
    Kinshasa = sel_Kinshasa,
    Phoenix = sel_Phoenix,
    Sydney = sel_Sydney
)

Assemble your data

In [None]:
models = []
model_dict = {}
for k in ds_dict.keys():
    string = k.split('.')
    model = string[2]
    models.append(model)
    scenario = string[3]
    print(string, model, scenario)
    model_dict['%s.%s' % (model,scenario)] = k

models = ['MPI-ESM1-2-HR', 'MIROC6']
scenarios = ['ssp126','ssp245','ssp585']
outs = []
for model in models:
    outsm = []
    for city in sels.keys():
        outis = []
        for scenario in scenarios:
            timeseries = xr.concat(
                        [
                            ds_dict[model_dict['%s.%s' % (model, 'historical')]].sel(sels[city],method='nearest').sel(time=slice('2014')),
                            ds_dict[model_dict['%s.%s' % (model, scenario)]].sel(time=slice('2100')).sel(sels[city],method='nearest')
                            # ds_dict['%s.%s' % (model, 'historical')].sel(sels[city],method='nearest').sel(time=slice('2014')),
                            # ds_dict['%s.%s' % (model, scenario)].sel(sels[city],method='nearest')
                        ],'time'
                        )
            timeseries = timeseries.assign_coords(city=city,scenario=scenario,model=model).squeeze()
            outis.append(timeseries)
            # outis.append(ds_dict[key].sel(sels[city],method='nearest').assign_coords(city=city,scenario=key).squeeze())
        outsm.append(xr.concat(outis,'scenario'))
    outs.append(xr.concat(outsm,'city'))
data = xr.concat(outs,'model')

In [None]:
print('The data has %.3f MB' % data.nbytes / 1e6)
print('The next line triggers the computation!')
#with ProgressBar():
#     data.load()

Fix time axis
data = out2.assign_coords(time=pd.to_datetime(data.time))
# Convert precip data to mm/day
data['pr'] = data['pr'] * 86400
data['pr'].attrs = data['pr'].attrs
data['pr'].attrs['units'] = 'mm/day'