# CMIP6 download and preprocess 

We will use the xmip package to download and preprocess CMIP6 data based on the following steps:

1. Download CMIP6 data
2. Merge CMIP6 data
3. Regrid CMIP6 data
4. Save CMIP6 data
5. Read and check the CMIP6 data

This script is modified from the pangeo tutorial: https://gallery.pangeo.io/repos/pangeo-gallery/cmip6/. For more details, please refer to the tutorial.

## 1. Download CMIP6 data

In [1]:
# Load the necessary packages
from xmip.utils import google_cmip_col
from xmip.preprocessing import combined_preprocessing
from xmip.postprocessing import merge_variables
import xarray as xr
import numpy as np
import xesmf as xe
import matplotlib.pyplot as plt
import cartopy.crs as crrs

In [2]:
col = google_cmip_col()
experiment_id='historical'

kwargs = {
    'zarr_kwargs':{
        'consolidated':True,
        'use_cftime':True
    },
    'aggregate':False,
    'preprocess':combined_preprocessing
}

cat_data = col.search(
    # source_id=source_id,
    experiment_id=experiment_id,
    grid_label='gn',
    table_id='Amon',
    variable_id=['ts', 'hfls', 'pr', 'clivi', 'clt', 'wap', 'cl'],
    member_id='r1i1p1f1'
)
cat_data.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,ts,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
1,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,hfls,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
2,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,cl,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
3,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,clivi,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
4,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,clt,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
...,...,...,...,...,...,...,...,...,...,...,...
232,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,Amon,hfls,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114
233,CMIP,MPI-M,ICON-ESM-LR,historical,r1i1p1f1,Amon,clivi,gn,gs://cmip6/CMIP6/CMIP/MPI-M/ICON-ESM-LR/histor...,,20210215
234,CMIP,MPI-M,ICON-ESM-LR,historical,r1i1p1f1,Amon,pr,gn,gs://cmip6/CMIP6/CMIP/MPI-M/ICON-ESM-LR/histor...,,20210215
235,CMIP,MPI-M,ICON-ESM-LR,historical,r1i1p1f1,Amon,ts,gn,gs://cmip6/CMIP6/CMIP/MPI-M/ICON-ESM-LR/histor...,,20210215


In [3]:
source_id = set(cat_data.df.source_id)
source_id.remove('ICON-ESM-LR')

In [4]:
set(source_id)

{'ACCESS-CM2',
 'ACCESS-ESM1-5',
 'AWI-CM-1-1-MR',
 'AWI-ESM-1-1-LR',
 'BCC-CSM2-MR',
 'BCC-ESM1',
 'CAMS-CSM1-0',
 'CAS-ESM2-0',
 'CESM2',
 'CESM2-FV2',
 'CESM2-WACCM',
 'CESM2-WACCM-FV2',
 'CMCC-CM2-HR4',
 'CMCC-CM2-SR5',
 'CMCC-ESM2',
 'CanESM5',
 'FGOALS-g3',
 'FIO-ESM-2-0',
 'GISS-E2-1-G',
 'GISS-E2-1-G-CC',
 'GISS-E2-1-H',
 'GISS-E2-2-H',
 'IITM-ESM',
 'MCM-UA-1-0',
 'MIROC6',
 'MPI-ESM-1-2-HAM',
 'MPI-ESM1-2-HR',
 'MPI-ESM1-2-LR',
 'MRI-ESM2-0',
 'NESM3',
 'NorCPM1',
 'NorESM2-LM',
 'NorESM2-MM',
 'SAM0-UNICON',
 'TaiESM1'}

In [5]:

cat_data = col.search(
    source_id=source_id,
    experiment_id=experiment_id,
    grid_label='gn',
    table_id='Amon',
    variable_id=['ts', 'hfls', 'pr', 'clivi', 'clt', 'wap', 'cl'],
    member_id='r1i1p1f1'
)

cat_data.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,ts,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
1,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,hfls,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
2,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,cl,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
3,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,clivi,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
4,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,clt,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
...,...,...,...,...,...,...,...,...,...,...,...
228,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,Amon,clt,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114
229,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,Amon,ts,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114
230,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,Amon,wap,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114
231,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,Amon,pr,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114


In [6]:
ddict = cat_data.to_dataset_dict(**kwargs)



--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'hectopascal' ([mass] / [length] / [time] ** 2) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Cannot convert from 'dimensionless' (dimensionless) to 'meter' ([length])
    incompatible units for variable 'lev': Canno

### Check AOGCM model

In [7]:
# Only key the model's source_type for AOGCM
aogcm_dict = {}
for k, ds in ddict.items():
    if 'AOGCM' in ds.attrs['source_type']:
        print(k)
        aogcm_dict[k]=ds

CMIP.NCAR.CESM2-WACCM.historical.r1i1p1f1.Amon.hfls.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/Amon/hfls/gn/v20190227/.20190227
CMIP.NCAR.CESM2-WACCM-FV2.historical.r1i1p1f1.Amon.clt.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/clt/gn/v20191120/.20191120
CMIP.NCAR.CESM2-WACCM.historical.r1i1p1f1.Amon.ts.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/Amon/ts/gn/v20190227/.20190227
CMIP.NCAR.CESM2-FV2.historical.r1i1p1f1.Amon.wap.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/wap/gn/v20191120/.20191120
CMIP.NCAR.CESM2-FV2.historical.r1i1p1f1.Amon.hfls.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/hfls/gn/v20191120/.20191120
CMIP.NCAR.CESM2-WACCM.historical.r1i1p1f1.Amon.clt.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/Amon/clt/gn/v20190227/.20190227
CMIP.NCAR.CESM2.historical.r1i1p1f1.Amon.cl.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/cl/gn/v20190308/.201903

## 2. Merge CMIP6 data


In [8]:

ddict_merged = merge_variables(aogcm_dict)
list(ddict_merged.keys())



['CESM2-WACCM.gn.historical.Amon.r1i1p1f1',
 'CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1',
 'CESM2-FV2.gn.historical.Amon.r1i1p1f1',
 'CESM2.gn.historical.Amon.r1i1p1f1',
 'TaiESM1.gn.historical.Amon.r1i1p1f1',
 'GISS-E2-1-H.gn.historical.Amon.r1i1p1f1',
 'FIO-ESM-2-0.gn.historical.Amon.r1i1p1f1',
 'BCC-ESM1.gn.historical.Amon.r1i1p1f1',
 'NESM3.gn.historical.Amon.r1i1p1f1',
 'GISS-E2-1-G-CC.gn.historical.Amon.r1i1p1f1',
 'NorCPM1.gn.historical.Amon.r1i1p1f1',
 'CanESM5.gn.historical.Amon.r1i1p1f1',
 'GISS-E2-2-H.gn.historical.Amon.r1i1p1f1',
 'BCC-CSM2-MR.gn.historical.Amon.r1i1p1f1',
 'GISS-E2-1-G.gn.historical.Amon.r1i1p1f1',
 'AWI-CM-1-1-MR.gn.historical.Amon.r1i1p1f1',
 'CAS-ESM2-0.gn.historical.Amon.r1i1p1f1',
 'MPI-ESM-1-2-HAM.gn.historical.Amon.r1i1p1f1',
 'MPI-ESM1-2-LR.gn.historical.Amon.r1i1p1f1',
 'NorESM2-MM.gn.historical.Amon.r1i1p1f1',
 'IITM-ESM.gn.historical.Amon.r1i1p1f1',
 'MRI-ESM2-0.gn.historical.Amon.r1i1p1f1',
 'MCM-UA-1-0.gn.historical.Amon.r1i1p1f1',
 'CMCC-CM

In [9]:
merged_vars_dict= {}
# Select the variables we need
required_vars = {'ts', 'hfls', 'pr', 'clivi', 'clt', 'wap', 'cl'}

for k, ds in ddict_merged.items():
    cmip_vars = set(ds.keys())
    print(k, ':', cmip_vars)
    # Check if the required variables are in the dataset
    if required_vars <= cmip_vars:
        # If the required variables are in the dataset, add it to the merged dataset
        merged_vars_dict[k] = ds


CESM2-WACCM.gn.historical.Amon.r1i1p1f1 : {'a', 'b', 'p0', 'wap', 'ps', 'cl', 'ts', 'pr', 'clivi', 'clt', 'hfls'}
CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1 : {'a', 'b', 'p0', 'wap', 'ps', 'cl', 'hfls', 'ts', 'clivi', 'clt', 'pr'}
CESM2-FV2.gn.historical.Amon.r1i1p1f1 : {'b', 'p0', 'wap', 'ps', 'cl', 'hfls', 'ts', 'pr', 'clivi', 'clt', 'a'}
CESM2.gn.historical.Amon.r1i1p1f1 : {'b', 'p0', 'a', 'wap', 'cl', 'hfls', 'ts', 'pr', 'clivi', 'clt', 'ps'}
TaiESM1.gn.historical.Amon.r1i1p1f1 : {'a', 'b', 'p0', 'wap', 'ps', 'cl', 'pr', 'ts', 'clivi', 'clt', 'hfls'}
GISS-E2-1-H.gn.historical.Amon.r1i1p1f1 : {'a', 'b', 'p0', 'wap', 'ps', 'cl', 'hfls', 'ts', 'clivi', 'clt', 'pr'}
FIO-ESM-2-0.gn.historical.Amon.r1i1p1f1 : {'wap', 'ts', 'pr', 'clivi', 'clt', 'hfls'}
BCC-ESM1.gn.historical.Amon.r1i1p1f1 : {'b', 'p0', 'ps', 'wap', 'cl', 'hfls', 'pr', 'ts', 'clivi', 'clt', 'a'}
NESM3.gn.historical.Amon.r1i1p1f1 : {'a', 'b', 'p0', 'wap', 'ps', 'cl', 'pr', 'ts', 'clivi', 'clt', 'hfls'}
GISS-E2-1-G-CC.gn.h

In [10]:
# check the number of models before and after filtering
len(ddict_merged.keys()), len(merged_vars_dict.keys())

(33, 27)

## 3. Regrid CMIP6 data

In [11]:
# ds_out form: 1X1 degree 
ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(-89.5, 90.5, 1.0)),
        "lon": (["lon"], np.arange(0.5, 360, 1.0)),
    }
)

In [12]:
ds_out


In [13]:
ds_in_regrids = {}
for key, ds in merged_vars_dict.items():
    print(key)
    ds_in = merged_vars_dict[key]
    regridder = xe.Regridder(ds_in, ds_out, 'bilinear')
    # the entire dataset can be processed at once
    ds_in_regrid = regridder(ds_in, keep_attrs=True)
    # Save to netcdf file
    ds_in_regrids[key] = ds_in_regrid

CESM2-WACCM.gn.historical.Amon.r1i1p1f1
CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1
CESM2-FV2.gn.historical.Amon.r1i1p1f1
CESM2.gn.historical.Amon.r1i1p1f1
TaiESM1.gn.historical.Amon.r1i1p1f1
GISS-E2-1-H.gn.historical.Amon.r1i1p1f1
BCC-ESM1.gn.historical.Amon.r1i1p1f1


  intermediate = blockwise(
  intermediate = blockwise(
  intermediate = blockwise(


NESM3.gn.historical.Amon.r1i1p1f1
GISS-E2-1-G-CC.gn.historical.Amon.r1i1p1f1
CanESM5.gn.historical.Amon.r1i1p1f1


  intermediate = blockwise(
  intermediate = blockwise(
  intermediate = blockwise(


BCC-CSM2-MR.gn.historical.Amon.r1i1p1f1
GISS-E2-1-G.gn.historical.Amon.r1i1p1f1
MPI-ESM-1-2-HAM.gn.historical.Amon.r1i1p1f1
MPI-ESM1-2-LR.gn.historical.Amon.r1i1p1f1
NorESM2-MM.gn.historical.Amon.r1i1p1f1
IITM-ESM.gn.historical.Amon.r1i1p1f1
MRI-ESM2-0.gn.historical.Amon.r1i1p1f1
CMCC-CM2-SR5.gn.historical.Amon.r1i1p1f1
FGOALS-g3.gn.historical.Amon.r1i1p1f1
ACCESS-CM2.gn.historical.Amon.r1i1p1f1
MPI-ESM1-2-HR.gn.historical.Amon.r1i1p1f1
CAMS-CSM1-0.gn.historical.Amon.r1i1p1f1
AWI-ESM-1-1-LR.gn.historical.Amon.r1i1p1f1
CMCC-ESM2.gn.historical.Amon.r1i1p1f1
SAM0-UNICON.gn.historical.Amon.r1i1p1f1
MIROC6.gn.historical.Amon.r1i1p1f1
CMCC-CM2-HR4.gn.historical.Amon.r1i1p1f1


## 4. Save CMIP6 data

In [14]:
# Apply land mask to the ts data
dmask = xr.open_dataset('/global/homes/y/yanxia/ENSO-CLOUD/HardISST/lsmask.nc')
cmip6_ts = {}
for k, ds in ds_in_regrids.items():
    cmip6_ts[k] = ds['ts']
    cmip6_ts[k] = cmip6_ts[k].where(dmask.mask.isel(time=0) == 1)

In [17]:

out_path = '../data/raw/'

for key, ds in ds_in_regrids.items():
    out_name = key 
    ds.to_netcdf(out_path+out_name + '.ts')
    
    

In [18]:
# Save the model names in a txt for postprocessing
with open(out_path+"source_id.txt", "w") as f:
    # Iterate over the list and write each item to the file
    for item in list(ds_in_regrids.keys()):
        f.write(str(item) + "\n")

# Close the file
f.close()


## 5. Read and check the CMIP6 data

In [19]:
ds_path = '../data/raw/BCC-CSM2-MR.gn.historical.Amon.r1i1p1f1.ts'

In [20]:
ds = xr.open_dataset(ds_path)

In [21]:
ds

In [5]:
# Read the model names from the txt file 
import os

def read_cmip6_models(file_path):
    """
    Read CMIP6 model names from a text file.
    
    Args:
    file_path (str): Path to the text file containing CMIP6 model names.
    
    Returns:
    list: A list of CMIP6 model names.
    """
    models = []
    
    # Check if the file exists
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return models
    
    # Read the file and extract model names
    try:
        with open(file_path, 'r') as file:
            for line in file:
                # Strip whitespace and newline characters
                model = line.strip()
                if model:  # Ignore empty lines
                    models.append(model.split('.')[0])
    except IOError as e:
        print(f"Error reading file: {e}")
    
    return models

# File path
CMIP6_MODELS = '../data/raw/source_id.txt'

# Read the models
cmip6_models = read_cmip6_models(CMIP6_MODELS)

# Print the results
if cmip6_models:
    print(f"Found {len(cmip6_models)} CMIP6 models:")
    for model in cmip6_models:
        print(model)
else:
    print("No models found or there was an error reading the file.")


Found 27 CMIP6 models:
CESM2-WACCM
CESM2-WACCM-FV2
CESM2-FV2
CESM2
TaiESM1
GISS-E2-1-H
BCC-ESM1
NESM3
GISS-E2-1-G-CC
CanESM5
BCC-CSM2-MR
GISS-E2-1-G
MPI-ESM-1-2-HAM
MPI-ESM1-2-LR
NorESM2-MM
IITM-ESM
MRI-ESM2-0
CMCC-CM2-SR5
FGOALS-g3
ACCESS-CM2
MPI-ESM1-2-HR
CAMS-CSM1-0
AWI-ESM-1-1-LR
CMCC-ESM2
SAM0-UNICON
MIROC6
CMCC-CM2-HR4


# Download Additional variables: PSL and Geopotential Height

Using the same search criteria, we can download the additional variables: PSL and Geopotential Height. 

In [10]:
col = google_cmip_col()
experiment_id='historical'

kwargs = {
    'zarr_kwargs':{
        'consolidated':True,
        'use_cftime':True
    },
    'aggregate':False,
    'preprocess':combined_preprocessing
}

cat_data = col.search(
    source_id=cmip6_models,
    experiment_id=experiment_id,
    grid_label='gn',
    table_id='Amon',
    variable_id=['zg', 'psl'],
    member_id='r1i1p1f1'
)
cat_data.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
1,CMIP,NASA-GISS,GISS-E2-1-G,historical,r1i1p1f1,Amon,zg,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/hi...,,20180827
2,CMIP,BCC,BCC-CSM2-MR,historical,r1i1p1f1,Amon,zg,gn,gs://cmip6/CMIP6/CMIP/BCC/BCC-CSM2-MR/historic...,,20181126
3,CMIP,BCC,BCC-CSM2-MR,historical,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/CMIP/BCC/BCC-CSM2-MR/historic...,,20181126
4,CMIP,MIROC,MIROC6,historical,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/CMIP/MIROC/MIROC6/historical/...,,20181212
5,CMIP,BCC,BCC-ESM1,historical,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/CMIP/BCC/BCC-ESM1/historical/...,,20181214
6,CMIP,BCC,BCC-ESM1,historical,r1i1p1f1,Amon,zg,gn,gs://cmip6/CMIP6/CMIP/BCC/BCC-ESM1/historical/...,,20181217
7,CMIP,MRI,MRI-ESM2-0,historical,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/CMIP/MRI/MRI-ESM2-0/historica...,,20190222
8,CMIP,NCAR,CESM2-WACCM,historical,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/histori...,,20190227
9,CMIP,NCAR,CESM2-WACCM,historical,r1i1p1f1,Amon,zg,gn,gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/histori...,,20190227


In [11]:
ddict = cat_data.to_dataset_dict(**kwargs)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


In [12]:
# Only key the model's source_type for AOGCM
aogcm_dict = {}
for k, ds in ddict.items():
    if 'AOGCM' in ds.attrs['source_type']:
        print(k)
        aogcm_dict[k]=ds

CMIP.NCAR.CESM2-FV2.historical.r1i1p1f1.Amon.psl.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/psl/gn/v20191120/.20191120
CMIP.NCAR.CESM2-WACCM.historical.r1i1p1f1.Amon.psl.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/Amon/psl/gn/v20190227/.20190227
CMIP.NCAR.CESM2-WACCM-FV2.historical.r1i1p1f1.Amon.psl.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/psl/gn/v20191120/.20191120
CMIP.NCAR.CESM2-FV2.historical.r1i1p1f1.Amon.zg.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/zg/gn/v20191120/.20191120
CMIP.NCAR.CESM2.historical.r1i1p1f1.Amon.psl.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/psl/gn/v20190308/.20190308
CMIP.NCAR.CESM2-WACCM-FV2.historical.r1i1p1f1.Amon.zg.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/zg/gn/v20191120/.20191120
CMIP.NCAR.CESM2.historical.r1i1p1f1.Amon.zg.gn.gs://cmip6/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/zg/gn/v20190308/.20190308
CMIP.NC

In [13]:
ddict_merged = merge_variables(aogcm_dict)
list(ddict_merged.keys())

['CESM2-FV2.gn.historical.Amon.r1i1p1f1',
 'CESM2-WACCM.gn.historical.Amon.r1i1p1f1',
 'CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1',
 'CESM2.gn.historical.Amon.r1i1p1f1',
 'TaiESM1.gn.historical.Amon.r1i1p1f1',
 'CMCC-ESM2.gn.historical.Amon.r1i1p1f1',
 'IITM-ESM.gn.historical.Amon.r1i1p1f1',
 'CMCC-CM2-SR5.gn.historical.Amon.r1i1p1f1',
 'BCC-ESM1.gn.historical.Amon.r1i1p1f1',
 'ACCESS-CM2.gn.historical.Amon.r1i1p1f1',
 'CMCC-CM2-HR4.gn.historical.Amon.r1i1p1f1',
 'CanESM5.gn.historical.Amon.r1i1p1f1',
 'MPI-ESM1-2-LR.gn.historical.Amon.r1i1p1f1',
 'MIROC6.gn.historical.Amon.r1i1p1f1',
 'GISS-E2-1-G-CC.gn.historical.Amon.r1i1p1f1',
 'SAM0-UNICON.gn.historical.Amon.r1i1p1f1',
 'MPI-ESM1-2-HR.gn.historical.Amon.r1i1p1f1',
 'MPI-ESM-1-2-HAM.gn.historical.Amon.r1i1p1f1',
 'GISS-E2-1-H.gn.historical.Amon.r1i1p1f1',
 'MRI-ESM2-0.gn.historical.Amon.r1i1p1f1',
 'NorESM2-MM.gn.historical.Amon.r1i1p1f1',
 'NESM3.gn.historical.Amon.r1i1p1f1',
 'AWI-ESM-1-1-LR.gn.historical.Amon.r1i1p1f1',
 'GISS

In [14]:
merged_vars_dict= {}
required_vars = {'psl', 'zg'}
for k, ds in ddict_merged.items():
    cmip_vars = set(ds.keys())
    print(k, ':', cmip_vars)
    if required_vars <= cmip_vars:
        merged_vars_dict[k] = ds


CESM2-FV2.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
CESM2-WACCM.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
CESM2.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
TaiESM1.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
CMCC-ESM2.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
IITM-ESM.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
CMCC-CM2-SR5.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
BCC-ESM1.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
ACCESS-CM2.gn.historical.Amon.r1i1p1f1 : {'psl', 'plev_bnds', 'zg'}
CMCC-CM2-HR4.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
CanESM5.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
MPI-ESM1-2-LR.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
MIROC6.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
GISS-E2-1-G-CC.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
SAM0-UNICON.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
MPI-ESM1-2-HR.gn.historical.Amon.r1i1p1f1 : {'psl', 'zg'}
MPI-ESM-1-2-HAM.gn.historical.Amon.r1i1p1f1 : {'psl'

In [15]:

ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(-89.5, 90.5, 1.0)),
        "lon": (["lon"], np.arange(0.5, 360, 1.0)),
    }
)

In [16]:
ds_in_regrids = {}
for key, ds in merged_vars_dict.items():
    print(key)
    ds_in = merged_vars_dict[key]
    regridder = xe.Regridder(ds_in, ds_out, 'bilinear')
    # the entire dataset can be processed at once
    ds_in_regrid = regridder(ds_in, keep_attrs=True)
    # Save to netcdf file
    ds_in_regrids[key] = ds_in_regrid

CESM2-FV2.gn.historical.Amon.r1i1p1f1
CESM2-WACCM.gn.historical.Amon.r1i1p1f1
CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1
CESM2.gn.historical.Amon.r1i1p1f1
TaiESM1.gn.historical.Amon.r1i1p1f1
CMCC-ESM2.gn.historical.Amon.r1i1p1f1
IITM-ESM.gn.historical.Amon.r1i1p1f1
CMCC-CM2-SR5.gn.historical.Amon.r1i1p1f1
BCC-ESM1.gn.historical.Amon.r1i1p1f1


  intermediate = blockwise(


ACCESS-CM2.gn.historical.Amon.r1i1p1f1
CMCC-CM2-HR4.gn.historical.Amon.r1i1p1f1
CanESM5.gn.historical.Amon.r1i1p1f1


  intermediate = blockwise(


MPI-ESM1-2-LR.gn.historical.Amon.r1i1p1f1
MIROC6.gn.historical.Amon.r1i1p1f1
GISS-E2-1-G-CC.gn.historical.Amon.r1i1p1f1
SAM0-UNICON.gn.historical.Amon.r1i1p1f1
MPI-ESM1-2-HR.gn.historical.Amon.r1i1p1f1
MPI-ESM-1-2-HAM.gn.historical.Amon.r1i1p1f1
GISS-E2-1-H.gn.historical.Amon.r1i1p1f1
MRI-ESM2-0.gn.historical.Amon.r1i1p1f1
NorESM2-MM.gn.historical.Amon.r1i1p1f1
NESM3.gn.historical.Amon.r1i1p1f1
AWI-ESM-1-1-LR.gn.historical.Amon.r1i1p1f1
GISS-E2-1-G.gn.historical.Amon.r1i1p1f1
FGOALS-g3.gn.historical.Amon.r1i1p1f1
BCC-CSM2-MR.gn.historical.Amon.r1i1p1f1
CAMS-CSM1-0.gn.historical.Amon.r1i1p1f1


In [18]:
out_path = '../data/raw/'

for key, ds in ds_in_regrids.items():
    out_name = key 
    print(out_name)
    ds.to_netcdf(out_path+out_name + '.psl-zg')

CESM2-FV2.gn.historical.Amon.r1i1p1f1
CESM2-WACCM.gn.historical.Amon.r1i1p1f1
CESM2-WACCM-FV2.gn.historical.Amon.r1i1p1f1
CESM2.gn.historical.Amon.r1i1p1f1
TaiESM1.gn.historical.Amon.r1i1p1f1
CMCC-ESM2.gn.historical.Amon.r1i1p1f1
IITM-ESM.gn.historical.Amon.r1i1p1f1
CMCC-CM2-SR5.gn.historical.Amon.r1i1p1f1
BCC-ESM1.gn.historical.Amon.r1i1p1f1
ACCESS-CM2.gn.historical.Amon.r1i1p1f1
CMCC-CM2-HR4.gn.historical.Amon.r1i1p1f1
CanESM5.gn.historical.Amon.r1i1p1f1
MPI-ESM1-2-LR.gn.historical.Amon.r1i1p1f1
MIROC6.gn.historical.Amon.r1i1p1f1
GISS-E2-1-G-CC.gn.historical.Amon.r1i1p1f1
SAM0-UNICON.gn.historical.Amon.r1i1p1f1
MPI-ESM1-2-HR.gn.historical.Amon.r1i1p1f1
MPI-ESM-1-2-HAM.gn.historical.Amon.r1i1p1f1
GISS-E2-1-H.gn.historical.Amon.r1i1p1f1
MRI-ESM2-0.gn.historical.Amon.r1i1p1f1
NorESM2-MM.gn.historical.Amon.r1i1p1f1
NESM3.gn.historical.Amon.r1i1p1f1
AWI-ESM-1-1-LR.gn.historical.Amon.r1i1p1f1
GISS-E2-1-G.gn.historical.Amon.r1i1p1f1
FGOALS-g3.gn.historical.Amon.r1i1p1f1
BCC-CSM2-MR.gn.histo