In [1]:
import time as clocktime
import glob
import xcdat as xc
import xarray as xr
import numpy as np
import os

# principal component analysis
from eofs.xarray import Eof

# define a lambda function to perform natural sort
import re
natsort = lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split("(\d+)", s)]

In [2]:
cmipTable = {
    "pr": "Amon",
    "psl": "Amon",
    "tas": "Amon",
    "zmta": "Amon",
    "tos": "Omon",
    "siconc": "OImon",
    "monmaxpr": "Aday",
    "monmaxtasmax": "Aday",
    "monmintasmin": "Aday",
}
cmipVar = {
    "pr": "pr",
    "psl": "psl",
    "tas": "tas",
    "zmta": "ta",
    "tos": "tos",
    "siconc": "siconc",
    "monmaxpr": "pr",
    "monmaxtasmax": "tasmax",
    "monmintasmin": "tasmin",
}
nlat = 72
nlon = 144

In [3]:
# User parameters
# root_dir = "/net/krypton/climdyn_nobackup/FTP/ForceSMIP/"  # path to forcesmip data (ETH)
root_dir = "/Volumes/R1/ForceSMIP/"  # path to forcesmip data (NCAR)
ncvar = "monmaxpr"  # variable to be used: pr, psl, tas, zmta, tos, siconc, monmaxpr, monmaxtasmax, monmintasmin
vid = cmipVar[ncvar]  # the variable id in the netcdf file differs – this maps to the standard CMIP variable name
reference_period = ("1950-01-01", "2023-01-01") # climatological period (for anomaly calculations)

# choose evaluation data
eval_tier = "Tier1"  # Tier1, Tier2, or Tier3
tv_time_period = ("1950-01-01", "2023-01-01")

In [4]:
# get training models
files = glob.glob(root_dir + '/Training/' + cmipTable[ncvar] + '/' + ncvar + '/*')
models = [p.split('/')[-1] for p in files]

# ensure directory exists to write out unforced time series
if not os.path.exists('data/'):
    os.mkdir('data')

# initialize a list to store model ensemble mean time series
model_mean_list = []

# loop over training models
for imodel, model in enumerate(models):
    # start timer
    stime = clocktime.time()
    # get model files
    mpath = root_dir + '/Training/' + cmipTable[ncvar] + '/' + ncvar + '/' + model
    mfiles = glob.glob(mpath + '/*')
    # parse file names to get list of model members
    # CESM2 has a non-CMIP naming convention
    if model == "CESM2":
        members = [p.split("ssp370_")[-1].split(".1880")[0] for p in mfiles]
    else:
        members = [p.split("_")[-1].split(".")[0] for p in mfiles]
    members.sort(key=natsort)
    # print progress
    print(str(imodel + 1) + " / " + str(len(models)) + ": " + model + " (" + str(len(members)) + " members)")
    # initialize model ensemble xarray dataset
    ds_model = None
    for im, member in enumerate(members):
        # print member progress
        print('.', end='')
        # get member filename
        fn = glob.glob(mpath + "/*_" + member + ".*.nc")
        # make sure filename is unique
        if len(fn) != 1:
            raise ValueError("Unexpected number of model members")
        else:
            fn = fn[0]
        # load data
        ds = xc.open_dataset(fn)
        ds = ds.bounds.add_missing_bounds(axes=['T'])
        # remove singletons / lon
        ds = ds.squeeze()
        #ds = ds.drop_vars('lon')
        # subset data to user-specified time period
        ds = ds.sel(time=slice(tv_time_period[0], tv_time_period[1]))
        # calculate departures (relative to user-specified reference time period)
        ds = ds.temporal.departures(vid, freq='month', reference_period=reference_period)
        #print(ds.tas.shape)
        # compute annual average temperature values
        ##ds = ds.temporal.group_average(vid, freq='year')
        ##print(ds.tas)
        # # This block is to ensure all models have the same time axis since
        # # we are concatenating models/members to the same xarray dataset.
        # # Even if time steps are the same, differing calendars can create problems
        # # during concatenation.
        # #nt
        # get reference time axis if it does not exist
        if 'ref_time' not in locals():
            ref_time = ds.time
            #print(ref_time)
        # validate model/member time is the same as reference time
        #ref_time = ds.time
        for i, t in enumerate(ds.time.values):
            m = t.month; y = t.year
            #print(m)
            #print(y)
            rt = ref_time.values[i]; rm = rt.month; ry = rt.year
            #print(rt)
            if ((ry != y) | (rm != m)):
                raise ValueError("model time and reference time do not match")
        # set model/member time to reference time
        #print(ref_time)
        #print(ds.time)
        ds["time"] = ref_time.copy()
        # add model realization to model ensemble dataset
        if ds_model is None:
            ds_model = ds
        else:
            ds_model = xr.concat((ds_model, ds), dim='member')
    # after looping over members, compute model ensemble mean time series
    ds_model_mean = ds_model.mean(dim='member', skipna=False)
    # calculate model anomaly values
    ###ds_model_anomaly = ds_model - ds_model_mean
    # save unforced anomaly values for "noise" calculations
    ###fnOut = 'data/' + ncvar + '_' + model + '_unforced.nc'
    ###ds_model_anomaly.to_netcdf(fnOut)
    # concatenate model ensemble mean to list
    model_mean_list.append(ds_model_mean.load())
    del ds_model, ds_model_mean #, ds_model_anomaly
    # print time elapse for model
    etime = clocktime.time()
    print()
    print("Time elapsed: " + str(etime - stime) + " seconds")
    print()

# concatenate model means together
# this will be used to produce the multimodel mean time series
ds_multi_model = xr.concat(model_mean_list, dim='model')



1 / 5: CESM2 (50 members)
..................................................
Time elapsed: 140.18196988105774 seconds

2 / 5: MPI-ESM1-2-LR (30 members)
..............................
Time elapsed: 66.66397190093994 seconds

3 / 5: MIROC6 (50 members)
..................................................
Time elapsed: 149.81667709350586 seconds

4 / 5: CanESM5 (25 members)
.........................
Time elapsed: 48.586920976638794 seconds

5 / 5: MIROC-ES2L (30 members)
..............................
Time elapsed: 62.857354164123535 seconds



In [5]:
# load mask file
if ncvar == "tas" or ncvar == "pr" or ncvar == "psl" or ncvar == "monmaxpr" or ncvar == "monmaxtasmax" or ncvar == "monmintasmin":
    maskfile = "nomask.nc"
    missing_data_maskx = xr.open_dataset(maskfile)
    missing_data = np.where(np.isnan(missing_data_maskx.tas.squeeze().transpose('lon', 'lat')), np.nan, 1)
else:
    maskfile = "maskland.nc"
    missing_data_maskx = xr.open_dataset(maskfile)
    missing_data = np.where(np.isnan(missing_data_maskx.tos.squeeze().transpose('lon', 'lat')), np.nan, 1)
del maskfile
#missing_data.shape

In [6]:
# multi-model mean and land mask
index_array = xr.DataArray([0, 1, 2, 3, 4], dims="model") # change the index to pick up models
ds_multi_model_mean5 = ds_multi_model.isel(model=index_array).mean(dim='model', skipna=False)
ds_multi_model_mean5 = ds_multi_model_mean5.bounds.add_missing_bounds()
lat_weights = ds_multi_model_mean5.spatial.get_weights(axis=['Y'])
ds_multi_model_mean5[ncvar] = ds_multi_model_mean5[vid].transpose('time', 'lon', 'lat')

masked = ds_multi_model_mean5[ncvar] * np.tile( np.expand_dims(missing_data, axis=0), (ds_multi_model_mean5[ncvar].shape[0], 1, 1) )


In [7]:
# EOF
solver = Eof(masked, weights=lat_weights) 
pcs = solver.pcs(npcs=5, pcscaling=0)
eofs = solver.eofs(neofs=5)

In [8]:
# get evaluation models
eval_dir = root_dir + '/Evaluation-' + eval_tier + '/' + cmipTable[ncvar] + '/' + ncvar
eval_models = glob.glob(eval_dir + '/*')
eval_models = [p.split('_')[-1].split('.')[0] for p in eval_models]
eval_models.sort(key=natsort)

# initialize output dataset
eval_model_climatology = None
# We also will define a reference plev (used to ensure all models are on the same plev axis)
ref_plev = None

# loop over evaluation models
for model in eval_models:
    print(model)
    # get file for model
    fn = glob.glob(eval_dir + '/*_' + model + '.*.nc')
    print(fn)
    # make sure filename is unique
    if len(fn) != 1:
        raise ValueError("Unexpected number of model members")
    else:
        fn = fn[0]
    # open dataset
    ds = xc.open_dataset(fn)
    ds = ds.bounds.add_missing_bounds(axes=['T'])
    # remove singletons
    ds = ds.squeeze()
    # compute annual average temperature values
    #ds = ds.temporal.group_average(vid, freq='year')
    # subset data to user-specified time period
    ds = ds.sel(time=slice(tv_time_period[0], tv_time_period[1]))
    #print(ds)
    ds = ds.temporal.departures(vid, freq='month', reference_period=reference_period)
    
    target = ds[vid].transpose('time', 'lon', 'lat') * np.tile( np.expand_dims(missing_data, axis=0), (ds[vid].shape[0], 1, 1) )
    
    pseudo_pcs = solver.projectField(target - target.mean(dim="time") ,neofs=1) #solver.projectField(data, neofs=4)
    print(pseudo_pcs)
    
    pseudo_pcs_new = pseudo_pcs.squeeze()
    pseudo_pcs_new
    print(pseudo_pcs_new)
    tmp1 = pseudo_pcs_new * eofs[0,:,:]
    tmp2 = tmp1 / lat_weights
    tmp3 = tmp2.transpose('time', 'lat', 'lon')
    
    tmp4 = tmp3 * np.tile( np.expand_dims(np.transpose(missing_data), axis=0), (tmp3.shape[0], 1, 1) )
    tmp4.attrs['_FillValue'] = 1e20
    
    output = xr.Dataset({vid: tmp4})  # xr.Dataset({"recon": tmp4, "pseudo_pcs": pseudo_pcs_new})
    output = output.fillna(1e20)
    output.to_netcdf(ncvar + '_' + model + '_' + eval_tier + '_AllFinger_TheFingerprinters.nc')
    

1A
['/Volumes/R1/ForceSMIP//Evaluation-Tier1/Aday/monmaxpr/monmaxpr_day_1A.195001-202212.nc']
<xarray.DataArray 'pseudo_pcs' (time: 876, mode: 1)>
array([[ 1.13607948e-02],
       [-1.79821905e-01],
       [-3.27717681e+00],
       [ 7.47501550e-01],
       [ 1.14237744e+00],
       [-4.02593903e-01],
       [ 7.39057231e-01],
       [ 2.12725948e+00],
       [ 1.89055615e+00],
       [ 3.91248722e-01],
       [-5.02296896e-02],
       [-1.99322332e-01],
       [ 8.82368359e-02],
       [-8.49098644e-01],
       [ 4.97702603e-01],
       [-5.92514182e-01],
       [ 1.18150093e+00],
       [-5.00832044e-01],
       [-8.03557281e-01],
       [ 3.77259925e-01],
...
       [ 1.77558384e-01],
       [-1.65953786e+00],
       [-3.58786586e-01],
       [-6.21895797e-01],
       [-2.19220867e+00],
       [ 6.04847509e-02],
       [-2.92313858e-01],
       [-1.88929069e-01],
       [-5.51065748e-01],
       [-1.15868086e+00],
       [-1.43188037e+00],
       [ 8.42965689e-01],
       [-1.543416

1C
['/Volumes/R1/ForceSMIP//Evaluation-Tier1/Aday/monmaxpr/monmaxpr_day_1C.195001-202212.nc']
<xarray.DataArray 'pseudo_pcs' (time: 876, mode: 1)>
array([[-3.69310055e+00],
       [-2.80678872e+00],
       [ 8.36137737e-01],
       [ 2.59054049e+00],
       [ 1.63207361e+00],
       [ 3.64348963e+00],
       [ 1.08802095e+00],
       [ 1.79152950e+00],
       [-1.58362563e+00],
       [ 2.84351566e+00],
       [-2.45977560e+00],
       [-7.60201117e-01],
       [-3.51175667e-01],
       [ 1.81044210e+00],
       [-2.12962458e+00],
       [ 5.42866356e-01],
       [ 2.94023892e+00],
       [ 4.19745436e+00],
       [ 1.38848391e+00],
       [ 2.96471653e-01],
...
       [-4.65130567e+00],
       [-3.50322943e+00],
       [-7.78572066e+00],
       [-4.59817000e+00],
       [-9.41974142e+00],
       [-6.28937750e+00],
       [-1.34629521e+00],
       [-1.72331683e+00],
       [ 7.10539803e-01],
       [-8.87689391e-02],
       [-2.47247184e+00],
       [-1.91400342e+00],
       [-2.502372

1E
['/Volumes/R1/ForceSMIP//Evaluation-Tier1/Aday/monmaxpr/monmaxpr_day_1E.195001-202212.nc']
<xarray.DataArray 'pseudo_pcs' (time: 876, mode: 1)>
array([[ 9.88625221e-01],
       [-2.60499706e+00],
       [-1.45069160e+00],
       [-3.30111877e+00],
       [ 1.94853781e+00],
       [ 2.38897096e+00],
       [ 9.21012927e-01],
       [ 7.05256747e+00],
       [-1.45796390e+00],
       [-2.04243120e-01],
       [-1.35023791e+00],
       [-5.30773912e-01],
       [-9.64455798e-01],
       [ 2.37807999e-01],
       [ 1.84978774e+00],
       [ 4.48113810e+00],
       [-9.54535883e-01],
       [ 4.54850846e+00],
       [ 1.40273572e+00],
       [ 3.52543760e+00],
...
       [-1.51852823e+00],
       [-4.43489551e+00],
       [-3.28625196e+00],
       [-6.66065249e+00],
       [-9.50256833e+00],
       [-5.97904943e+00],
       [-2.60135217e+00],
       [ 7.16081483e-01],
       [-2.45428802e+00],
       [-2.90517462e+00],
       [ 2.12836937e+00],
       [-2.22917854e+00],
       [ 5.087481

1G
['/Volumes/R1/ForceSMIP//Evaluation-Tier1/Aday/monmaxpr/monmaxpr_day_1G.195001-202212.nc']
<xarray.DataArray 'pseudo_pcs' (time: 876, mode: 1)>
array([[-1.11439115e+00],
       [-1.79928960e+00],
       [-1.88483380e+00],
       [-1.31701865e+00],
       [ 1.37777653e+00],
       [ 2.08354188e-01],
       [ 2.96913362e-01],
       [ 1.69638884e+00],
       [ 1.16507259e+00],
       [ 1.25755439e+00],
       [-2.48357359e+00],
       [ 4.20577348e-02],
       [-2.53880928e+00],
       [ 8.04803601e-01],
       [ 1.45951577e+00],
       [-1.91181522e+00],
       [-1.03892481e+00],
       [ 1.48768189e+00],
       [ 1.49753397e+00],
       [ 1.75641177e-02],
...
       [-2.49489803e+00],
       [-4.42634193e-02],
       [-6.21184155e+00],
       [-1.28119464e+00],
       [-2.94887862e-02],
       [-1.92670449e+00],
       [-2.69013645e+00],
       [-1.98769557e+00],
       [-2.31682949e+00],
       [-1.31344375e+00],
       [-1.85621562e+00],
       [-2.11112206e+00],
       [ 2.642364

1I
['/Volumes/R1/ForceSMIP//Evaluation-Tier1/Aday/monmaxpr/monmaxpr_day_1I.195001-202212.nc']
<xarray.DataArray 'pseudo_pcs' (time: 876, mode: 1)>
array([[ 2.23512339e+00],
       [ 4.46289464e-01],
       [-5.56585846e-01],
       [ 5.26590658e-01],
       [-4.76522652e-01],
       [ 2.45613666e+00],
       [-6.72753746e-01],
       [-8.23889769e-01],
       [ 1.04596660e+00],
       [ 1.27185512e+00],
       [ 7.37846557e-01],
       [-2.20747427e+00],
       [-6.83045757e-01],
       [-1.85606196e+00],
       [-2.56535570e+00],
       [ 1.46467470e-02],
       [-1.63440711e+00],
       [-4.30940294e-01],
       [ 1.09707389e+00],
       [ 2.88239797e-01],
...
       [-4.09170066e+00],
       [-6.87808613e-02],
       [-6.25982817e+00],
       [-2.99614409e+00],
       [-5.20005109e+00],
       [-3.77134908e+00],
       [-7.78934471e-01],
       [-1.95754346e+00],
       [-1.19863861e-01],
       [-1.63024696e-01],
       [-1.32843738e+00],
       [-2.39103918e+00],
       [-2.052222