In [5]:
import intake
import xarray as xr
import dask
dask.config.set({'array.slicing.split_large_chunks': False})

url = "https://storage.googleapis.com/cmip6/cmip6-pgf-ingestion-test/catalog/catalog.json"
col = intake.open_esm_datastore(url)

# Search for MONTHLY tas 
hist = col.search(source_id='MPI-ESM1-2-LR', experiment_id='historical', table_id='Amon', variable_id='tas')
g6_lr = col.search(source_id='MPI-ESM1-2-LR', experiment_id='G6sulfur', table_id='Amon', variable_id='tas')

# Load datasets
hist_dict = hist.to_dataset_dict()
g6_lr_dict = g6_lr.to_dataset_dict()

# Print available keys
print("\nHistorical keys:", list(hist_dict.keys()))
print("G6sulfur keys:", list(g6_lr_dict.keys()))

# Extract historical tas (1940-2014, aligned with ERA5)
hist_tas = hist_dict['CMIP.MPI-M.MPI-ESM1-2-LR.historical.Amon.gn']['tas'].sel(
    variant_label='r1i1p1f1', 
    time=slice('1940', '2014')
).squeeze()

# Extract G6sulfur LR tas
g6_lr_tas = g6_lr_dict['GeoMIP.MPI-M.MPI-ESM1-2-LR.G6sulfur.Amon.gn']['tas'].sel(
    variant_label='r1i1p1f1'
).squeeze()

print(f"\nHist shape: {hist_tas.shape}")  # Should be ~900 months (75 years)
print(f"G6 shape: {g6_lr_tas.shape}")     # Should be ~1032 months (86 years)

# Save to netcdf
hist_tas.to_netcdf('data/cmip6/hist_lr_tas_monthly_1940_2014.nc')
g6_lr_tas.to_netcdf('data/cmip6/g6_lr_tas_monthly_2015_2100.nc')

print("\nSaved!")


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'



--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'



Historical keys: ['CMIP.MPI-M.MPI-ESM1-2-LR.historical.Amon.gn']
G6sulfur keys: ['GeoMIP.MPI-M.MPI-ESM1-2-LR.G6sulfur.Amon.gn']

Hist shape: (900, 96, 192)
G6 shape: (1032, 96, 192)

Saved!
