In [1]:
import time as clocktime
import glob
import xcdat as xc
import xarray as xr
import numpy as np
import os

# principal component analysis
from eofs.xarray import Eof

# define a lambda function to perform natural sort
import re
natsort = lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split("(\d+)", s)]

In [2]:
cmipTable = {
    "pr": "Amon",
    "psl": "Amon",
    "tas": "Amon",
    "zmta": "Amon",
    "tos": "Omon",
    "siconc": "OImon",
    "monmaxpr": "Aday",
    "monmaxtasmax": "Aday",
    "monmintasmin": "Aday",
}
cmipVar = {
    "pr": "pr",
    "psl": "psl",
    "tas": "tas",
    "zmta": "ta",
    "tos": "tos",
    "siconc": "siconc",
    "monmaxpr": "pr",
    "monmaxtasmax": "tasmax",
    "monmintasmin": "tasmin",
}
nlat = 72
nlon = 144

In [3]:
# User parameters
# root_dir = "/net/krypton/climdyn_nobackup/FTP/ForceSMIP/"  # path to forcesmip data (ETH)
root_dir = "/Volumes/R1/ForceSMIP/"  # path to forcesmip data (NCAR)
ncvar = "zmta"  # variable to be used: pr, psl, tas, zmta, tos, siconc, monmaxpr, monmaxtasmax, monmintasmin
vid = cmipVar[ncvar]  # the variable id in the netcdf file differs – this maps to the standard CMIP variable name
reference_period = ("1950-01-01", "2023-01-01") # climatological period (for anomaly calculations)

# choose evaluation data
eval_tier = "Tier1"  # Tier1, Tier2, or Tier3
tv_time_period = ("1950-01-01", "2023-01-01")

In [4]:
# get training models
files = glob.glob(root_dir + '/Training/' + cmipTable[ncvar] + '/' + ncvar + '/*')
models = [p.split('/')[-1] for p in files]

# ensure directory exists to write out unforced time series
if not os.path.exists('data/'):
    os.mkdir('data')

# initialize a list to store model ensemble mean time series
model_mean_list = []

# loop over training models
for imodel, model in enumerate(models):
    # start timer
    stime = clocktime.time()
    # get model files
    mpath = root_dir + '/Training/' + cmipTable[ncvar] + '/' + ncvar + '/' + model
    mfiles = glob.glob(mpath + '/*')
    # parse file names to get list of model members
    # CESM2 has a non-CMIP naming convention
    if model == "CESM2":
        members = [p.split("ssp370_")[-1].split(".1880")[0] for p in mfiles]
    else:
        members = [p.split("_")[-1].split(".")[0] for p in mfiles]
    members.sort(key=natsort)
    # print progress
    print(str(imodel + 1) + " / " + str(len(models)) + ": " + model + " (" + str(len(members)) + " members)")
    # initialize model ensemble xarray dataset
    ds_model = None
    for im, member in enumerate(members):
        # print member progress
        print('.', end='')
        # get member filename
        fn = glob.glob(mpath + "/*_" + member + ".*.nc")
        # make sure filename is unique
        if len(fn) != 1:
            raise ValueError("Unexpected number of model members")
        else:
            fn = fn[0]
        # load data
        ds = xc.open_dataset(fn)
        # remove singletons / lon
        ds = ds.squeeze()
        ds = ds.bounds.add_missing_bounds(axes=['T','Z'])
        
        
        #ds = ds.drop_vars('lon')
        # subset data to user-specified time period
        ds = ds.sel(time=slice(tv_time_period[0], tv_time_period[1]))
        # calculate departures (relative to user-specified reference time period)
        ds = ds.temporal.departures(vid, freq='month', reference_period=reference_period)
        #print(ds.tas.shape)
        # compute annual average temperature values
        ##ds = ds.temporal.group_average(vid, freq='year')
        ##print(ds.tas)
        # # This block is to ensure all models have the same time axis since
        # # we are concatenating models/members to the same xarray dataset.
        # # Even if time steps are the same, differing calendars can create problems
        # # during concatenation.
        # #nt
        # get reference time axis if it does not exist
        if 'ref_time' not in locals():
            ref_time = ds.time
            #print(ref_time)
        # validate model/member time is the same as reference time
        #ref_time = ds.time
        for i, t in enumerate(ds.time.values):
            m = t.month; y = t.year
            #print(m)
            #print(y)
            rt = ref_time.values[i]; rm = rt.month; ry = rt.year
            #print(rt)
            if ((ry != y) | (rm != m)):
                raise ValueError("model time and reference time do not match")
        # set model/member time to reference time
        #print(ref_time)
        #print(ds.time)
        ds["time"] = ref_time.copy()
        # add model realization to model ensemble dataset
        if ds_model is None:
            ds_model = ds
        else:
            ds_model = xr.concat((ds_model, ds), dim='member')
    # after looping over members, compute model ensemble mean time series
    ds_model_mean = ds_model.mean(dim='member', skipna=False)
    # calculate model anomaly values
    ###ds_model_anomaly = ds_model - ds_model_mean
    # save unforced anomaly values for "noise" calculations
    ###fnOut = 'data/' + ncvar + '_' + model + '_unforced.nc'
    ###ds_model_anomaly.to_netcdf(fnOut)
    # concatenate model ensemble mean to list
    model_mean_list.append(ds_model_mean.load())
    del ds_model, ds_model_mean #, ds_model_anomaly
    # print time elapse for model
    etime = clocktime.time()
    print()
    print("Time elapsed: " + str(etime - stime) + " seconds")
    print()

# concatenate model means together
# this will be used to produce the multimodel mean time series
ds_multi_model = xr.concat(model_mean_list, dim='model')



1 / 5: CESM2 (50 members)
.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.




Time elapsed: 35.67357635498047 seconds

2 / 5: MPI-ESM1-2-LR (30 members)
.



.



.



.



.



.



.



.



.



..



.



.



.



.



..



.



.



.



.



.



.



.



.



.



.



..



.
Time elapsed: 47.015783071517944 seconds

3 / 5: MIROC6 (50 members)
.



.



.



.



.



.



.



.



.



.



.



..



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



.



..



.



.



.



.




Time elapsed: 83.49214100837708 seconds

4 / 5: CanESM5 (25 members)
.



.



.



.



..



..



.



..



.



.



.



.



.



..



.



.



.



.



.



.



.




Time elapsed: 38.27353501319885 seconds

5 / 5: MIROC-ES2L (30 members)
.



.



.



..



.



.



.



.



.



.



.



..



.



.



.



.



.



.



.



.



..



.



..



.



.



.
Time elapsed: 45.685871839523315 seconds



In [5]:
#load mask file
if ncvar == "tas" or ncvar == "pr" or ncvar == "psl" or ncvar == "monmaxpr" or ncvar == "monmaxtasmax" or ncvar == "monmintasmin":
    maskfile = "nomask.nc"
    missing_data_maskx = xr.open_dataset(maskfile)
    missing_data = np.where(np.isnan(missing_data_maskx.tas.squeeze().transpose('lon', 'lat')), np.nan, 1)
elif ncvar == "tos":
    maskfile = "maskland.nc"
    missing_data_maskx = xr.open_dataset(maskfile)
    missing_data = np.where(np.isnan(missing_data_maskx.tos.squeeze().transpose('lon', 'lat')), np.nan, 1)
else:
    maskfile = "zonmean_mask.nc"
    missing_data_maskx = xr.open_dataset(maskfile)
    missing_data = np.where(np.isnan(missing_data_maskx.ta.squeeze().transpose('plev', 'lat')), np.nan, 1)
    
del maskfile
#missing_data.shape

In [6]:
# multi-model mean and land mask
index_array = xr.DataArray([0, 1, 2, 3, 4], dims="model")
ds_multi_model_mean5 = ds_multi_model.isel(model=index_array).mean(dim='model', skipna=False)
ds_multi_model_mean5 = ds_multi_model_mean5.bounds.add_missing_bounds()

lat_weights = ds_multi_model_mean5.spatial.get_weights(axis=['Y'])
lat_weights0 = np.expand_dims(lat_weights, axis=0)
lev_weights = ds_multi_model_mean5['plev_bnds'][:,0] - ds_multi_model_mean5['plev_bnds'][:,1]
lev_weights0 = np.expand_dims(lev_weights, axis=1)
weights = lat_weights0 * lev_weights0

ds_multi_model_mean5[ncvar] = ds_multi_model_mean5[vid].transpose('time', 'plev', 'lat')
#masked = ds_multi_model_mean5[ncvar] * np.tile( np.expand_dims(missing_data, axis=0), (ds_multi_model_mean5[ncvar].shape[0], 1, 1) )

masked = ds_multi_model_mean5[ncvar] * np.tile( np.expand_dims(missing_data, axis=0), (ds_multi_model_mean5[ncvar].shape[0], 1, 1) )
weights_ex = np.tile( np.expand_dims(weights, axis=0), (ds_multi_model_mean5[ncvar].shape[0], 1, 1) )

In [7]:
# EOF
solver = Eof(masked, weights=weights_ex) 
pcs = solver.pcs(npcs=5, pcscaling=0)
eofs = solver.eofs(neofs=5)

In [8]:
# get evaluation models
eval_dir = root_dir + '/Evaluation-' + eval_tier + '/' + cmipTable[ncvar] + '/' + ncvar
eval_models = glob.glob(eval_dir + '/*')
eval_models = [p.split('_')[-1].split('.')[0] for p in eval_models]
eval_models.sort(key=natsort)

# initialize output dataset
eval_model_climatology = None
# We also will define a reference plev (used to ensure all models are on the same plev axis)
ref_plev = None

# loop over evaluation models
for model in eval_models:
    print(model)
    # get file for model
    fn = glob.glob(eval_dir + '/*_' + model + '.*.nc')
    print(fn)
    # make sure filename is unique
    if len(fn) != 1:
        raise ValueError("Unexpected number of model members")
    else:
        fn = fn[0]
    # open dataset
    ds = xc.open_dataset(fn)
    #ds = ds.drop_vars('lon')
    ds = ds.squeeze()
    ds = ds.bounds.add_missing_bounds(axes=['T','Z'])
    # remove singletons

    # compute annual average temperature values
    #ds = ds.temporal.group_average(vid, freq='year')
    # subset data to user-specified time period
    ds = ds.sel(time=slice(tv_time_period[0], tv_time_period[1]))
    #print(ds)
    ds = ds.temporal.departures(vid, freq='month', reference_period=reference_period)
    
    target = ds[vid].transpose('time', 'plev', 'lat') * np.tile( np.expand_dims(missing_data, axis=0), (ds[vid].shape[0], 1, 1) )
    print(target)
    pseudo_pcs = solver.projectField(target - target.mean(dim="time") ,neofs=1) #solver.projectField(data, neofs=4)
    print(pseudo_pcs)
    
    pseudo_pcs_new = pseudo_pcs.squeeze()
    pseudo_pcs_new
    print(pseudo_pcs_new)
    tmp1 = pseudo_pcs_new * eofs[0,:,:]
    print(tmp1.size)
    tmp2 = tmp1 / weights
    tmp3 = tmp2.transpose('time', 'plev', 'lat')
    
    tmp4 = tmp3 * np.tile( np.expand_dims(missing_data, axis=0), (tmp3.shape[0], 1, 1) )
    tmp4.attrs['_FillValue'] = 1e20
    
    output = xr.Dataset({vid: tmp4}) #output = xr.Dataset({"recon": tmp4, "pseudo_pcs": -pseudo_pcs_new}) 
    output = output.fillna(1e20)
    output.to_netcdf(ncvar + '_' + model + '_' + eval_tier + '_AllFinger_TheFingerprinters.nc')
    

1A
['/Volumes/R1/ForceSMIP//Evaluation-Tier1/Amon/zmta/zmta_mon_1A.195001-202212.nc']
<xarray.DataArray 'ta' (time: 876, plev: 17, lat: 72)>
array([[[         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,  -0.14218822, ...,  -1.88824445,
          -1.9343132 ,  -1.56851896],
        [         nan,          nan,   0.0565193 , ...,  -1.69016221,
          -1.47338116,  -1.26608637],
        ...,
        [ -1.12028188,  -1.09047983,  -1.05526916, ...,  -3.64333826,
          -3.69822983,  -3.71864235],
        [ -1.43665567,  -1.42611542,  -1.36371971, ...,  -2.0720949 ,
          -2.02027425,  -1.96419239],
        [ -0.51104392,  -0.51436433,  -0.48581005, ...,   1.47700562,
           1.75699954,   1.92790443]],

       [[         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,   1.25430593, ...,   0.03987255,
           0.44078

<xarray.DataArray 'ta' (time: 876, plev: 17, lat: 72)>
array([[[            nan,             nan,             nan, ...,
                     nan,             nan,             nan],
        [            nan,             nan, -9.15187608e-01, ...,
          7.84162273e-01,  1.54797896e+00,  2.19056637e+00],
        [            nan,             nan, -8.80001458e-01, ...,
          8.07195872e-01,  1.19166280e+00,  1.49810510e+00],
        ...,
        [ 7.18388672e-02,  4.32231625e-02,  1.48758899e-02, ...,
         -4.19870072e-01, -5.80662246e-01, -7.04429048e-01],
        [-6.01659360e-02, -4.96663770e-02, -2.66380652e-02, ...,
         -1.72707590e-01, -1.31175152e-02,  8.29835526e-02],
        [ 7.61660700e-01,  7.70890811e-01,  7.80939932e-01, ...,
          8.11903828e-01,  1.34784340e+00,  1.64153613e+00]],

       [[            nan,             nan,             nan, ...,
                     nan,             nan,             nan],
        [            nan,             nan, -2.29

<xarray.DataArray 'ta' (time: 876, plev: 17, lat: 72)>
array([[[        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,  0.41942232, ..., -2.34005651,
         -2.43664698, -2.43033522],
        [        nan,         nan, -0.15848722, ..., -2.56934992,
         -2.6296602 , -2.8284757 ],
        ...,
        [ 0.40373368,  0.43534915,  0.48269438, ..., -0.36720776,
         -0.34578544, -0.33493012],
        [ 0.19180529,  0.20242941,  0.23367746, ...,  0.58154063,
          0.60141359,  0.58011353],
        [ 1.4189819 ,  1.39718513,  1.36483433, ...,  1.42543837,
          1.76249162,  1.97335583]],

       [[        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan, -0.85577044, ..., -0.15889488,
          0.38076736,  0.93516486],
        [        nan,         nan, -1.56446716, ...,  0.3789844 ,
          0.92141706,  1.2850366 ],
...
        

<xarray.DataArray 'ta' (time: 876, plev: 17, lat: 72)>
array([[[         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,  -0.76940559, ...,   0.86989234,
           1.07200797,   1.29186048],
        [         nan,          nan,  -0.83704763, ...,   0.63692104,
           0.77555383,   0.91920121],
        ...,
        [  0.88091694,   0.89902586,   0.92230955, ...,  14.64194888,
          15.15098574,  15.45432603],
        [  0.58503784,   0.60443058,   0.64302565, ...,  13.44416956,
          13.90841841,  14.20010729],
        [  1.32617142,   1.33044871,   1.35189025, ...,   8.02137976,
           8.39355236,   8.6106549 ]],

       [[         nan,          nan,          nan, ...,          nan,
                  nan,          nan],
        [         nan,          nan,  -0.2928603 , ...,  -2.5883451 ,
          -2.18312392,  -1.97670588],
        [         nan,          nan,  -0.09674317, ...,  -2.644380

<xarray.DataArray 'ta' (time: 876, plev: 17, lat: 72)>
array([[[            nan,             nan,             nan, ...,
                     nan,             nan,             nan],
        [            nan,             nan, -5.57744456e-01, ...,
         -1.37503447e+00, -1.74358331e+00, -2.06888404e+00],
        [            nan,             nan, -4.05868493e-01, ...,
         -6.27898690e-01, -7.93865441e-01, -8.20337584e-01],
        ...,
        [ 1.02195993e+00,  1.03893399e+00,  1.07084048e+00, ...,
         -4.59310597e+00, -4.88105148e+00, -4.99256011e+00],
        [ 1.93346648e+00,  1.86002827e+00,  1.81519939e+00, ...,
         -8.31896621e+00, -8.59089992e+00, -8.72874722e+00],
        [ 1.97893808e+00,  2.03663363e+00,  2.03356287e+00, ...,
         -9.51142045e+00, -9.08594232e+00, -8.88325042e+00]],

       [[            nan,             nan,             nan, ...,
                     nan,             nan,             nan],
        [            nan,             nan, -1.80