# This script is to download CMIP6 data using intake-esm library

Read more from link

https://intake-esm.readthedocs.io/en/stable/tutorials/loading-cmip6-data.html

How to use intake to download GCM


In [1]:
import intake
import os
import dask

### Specify the download location for CMIP6 data

In [None]:
# Please edit this section if necessary. It need more than 300GB capacity.
download_dir = 'Download/' 

In [3]:
# Information of all CMIP6 files that one can download from intake esm data store 
url = "https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
# open the catalog
dataframe = intake.open_esm_datastore(url)
#dataframe.df.columns
#df = dataframe.df

In [4]:
'''
This is to sort files with name include
    r1: Realization (initial condition run)
    i1: Initialization method
    p1: Physical parameters
    f1: External forcings
'''

import re
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

In [None]:
for sid in [ 'EC-Earth3', 'MIROC6',  'MRI-ESM2-0', 'ACCESS-CM2', 'IPSL-CM6A-LR', 'MPI-ESM1-2-HR'][:1]:
    
    for exp in ['historical','ssp585', 'ssp126', 'ssp370','ssp245'][:]:
        
            for var in ['tas','ta','ua','va','hur','zg','ts'][:]:
                # seach all files with information given above
                models = dataframe.search(experiment_id=exp,
                                          table_id='Amon',
                                          variable_id=var,
                                          source_id = sid,
                                          #institution_id=ins,
                                          #member_id=mem
                                          )  
                # then one might get several files with the same conditions
                # r1: Realization (initial condition run)
                # i1: Initialization method
                # p1: Physical parameters
                # f1: External forcings

                print(var, exp, sid,  len(models.df))

                # if no files exist then print out error
                if len(models.df) == 0: print('*** \n Erorrrr \n')

                # sort the possible files
                ml = natural_sort(models.df.member_id.values)

                # get the first one only then seach again
                mem = ml[0]
                model_s = dataframe.search(experiment_id=exp,
                                          table_id='Amon',
                                          variable_id=var,
                                          source_id = sid,
                                          #institution_id=ins,
                                          member_id=mem
                                          )                  

                # if no files exist then print out error
                if len(model_s.df) == 0: print('*** \n Erorrrr \n')

                print(mem)

                if len(model_s.df) > 0:
                    print('Download')
                    
                    if True:
                        
                        try:
                        
                            datasets = model_s.to_dataset_dict(zarr_kwargs={'consolidated': True, "decode_times": True, "use_cftime": True })
                            #datasets = models.to_dataset_dict(xarray_open_kwargs={"consolidated": True, "decode_times": True, "use_cftime": True})
                            
                            for k, v in datasets.items():
                                # Download to the specified directory. !Add by Mamoru.
                                odir = download_dir+sid+'/'+exp+'/'
                                if not os.path.exists(odir): os.makedirs(odir)
                                ofile = odir + var + '_'+ k + '_'+mem+'.nc'
                                print('write to ',ofile)
                                #v.to_netcdf(ofile)
                                dask.delayed(v.to_netcdf)(ofile).compute()
                        except:
                            print('fail')
                            

tas historical MRI-ESM2-0 11
r1i1p1f1
Download

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
write to  Download/MRI-ESM2-0/historical/tas_CMIP.MRI.MRI-ESM2-0.historical.Amon.gn_r1i1p1f1.nc
ta historical MRI-ESM2-0 11
r1i1p1f1
Download

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
write to  Download/MRI-ESM2-0/historical/ta_CMIP.MRI.MRI-ESM2-0.historical.Amon.gn_r1i1p1f1.nc
ua historical MRI-ESM2-0 10
r1i1p1f1
Download

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
write to  Download/MRI-ESM2-0/historical/ua_CMIP.MRI.MRI-ESM2-0.historical.Amon.gn_r1i1p1f1.nc
