In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe
import dask
import intake
import fsspec
import os
from collections import defaultdict
from tqdm.autonotebook import tqdm  # Fancy progress bars for our loops!
from xmip.utils import google_cmip_col
from xmip.postprocessing import combine_datasets, _match_datasets,_concat_sorted_time

In [None]:
in_dir = '/home/jovyan/CMIP6cf/output/subsetted_pr/'

ddict = defaultdict(dict)

for source_id in [s for s in os.listdir(in_dir) if s.startswith('.')==False]:
    
    experiment_ids = [s.split('_')[2] for s in os.listdir(os.path.join(in_dir,source_id)) if s.startswith('.')==False]
    for experiment_id in set(experiment_ids): #for each experiment_id, open the datasets, concatenating all realizations:
        
        source_ds = xr.open_mfdataset(os.path.join(in_dir,source_id,'*'+experiment_id+'*.nc'),join='outer',combine='nested',
                                      compat='override',coords='minimal',concat_dim='member_id') #need to test this for large np. of realizations, like EC-Earth3
        ddict[source_ds.original_key.rsplit('.',1)[0]] = source_ds

In [None]:
ssps = set([k.split('.')[2] for k in ddict.keys() if 'ssp' in k])

ddict_concat = defaultdict(dict)

for ssp in ssps:
    ddict_ssp = defaultdict(dict)
    
    for k in ddict.keys():
        if ((ssp in k) or ('historical' in k)):
            ddict_ssp[k] = ddict[k]
            
    #append SSP to historical, only for realizations for which both experiments are provided (join=inner)
    hist_ssp = combine_datasets(ddict_ssp,
                                _concat_sorted_time,
                                match_attrs =['source_id', 'grid_label','table_id'],combine_func_kwargs={'join':'inner'})
    
    for key,ds in hist_ssp.items(): #put back together in dictionary
        ddict_concat[key+'.'+ssp] = ds

Store (per SSP or all together? all together is easier for later, but may give quite large files?):

In [None]:
for key,ds in tqdm(ddict_concat.items()):
    model_path = os.path.join('/home/jovyan/CMIP6cf/output/pr_timeseries/',ds.source_id)
    if not os.path.exists(model_path):
        os.mkdir(model_path)
    ds.to_netcdf(os.path.join(model_path,key.replace('.','_')+'.nc'),mode='w')