# Multi-model mean scaling

This notebook takes the multimodel mean forced climate change (from the training dataset) and scales it to match a simulation / dataset of interest.

### Imports

In [1]:
import xcdat as xc
from fx import get_decimal_year_time
import numpy as np
import glob
import xarray as xr
import os
from scipy.signal import savgol_filter
# suppress warnings (but allow errors)
import logging
logging.getLogger('xcdat').setLevel(logging.ERROR)

### Custom functions

In [2]:
def fit_monthly_fourth_order_polynomial(ds, lvar, monthly=True):
    """
    fit_monthly_fourth_order_polynomial(ds, lvar)

    Function fits a fourth order polynomial to the "raw" dataarray (at each 
    point) that includes both internal variability and the forced response 
    such that:

        y = a*t^4 + b*t^3 + c*t^2 + d*t + e

    where y is the "raw" time series (at each point), t is time (in units of
    decimal year), a-e are the coefficients of the fit. The resulting fit is
    an estimate of the forced climate response.

    Parameters:
    -----------
    ds (xr.Dataset)  : Dataset containing the raw (forced+internal) data
    lvar (str)       : variable id corresponding to the dataarrays to be scaled
    monthly (bool)   : Boolean to fit data monthly (True) or all together (False), 
                       default True

    Returns:
    --------
    xr.Dataset : Dataset containing an estimate of the forced response

    Notes:
    ------
    The fit is performe
    """
    # ensure bounds are present
    ds = ds.bounds.add_missing_bounds(['T'])
    # get departures
    ds = ds.temporal.departures(lvar, freq='month')
    # get decimal year for polynomial fit
    dtime = get_decimal_year_time(ds.time)
    # get dataarray shape
    shp = ds[lvar].shape
    # get monthly dataarray shape
    shp_annual = ds[lvar][::12].shape
    # reshape dataarray for fitting [time, space]
    Y = np.reshape(ds[lvar].values, (shp[0], -1))
    # create output dataset
    dsf = ds.copy()
    # loop over and fit each month separately
    if monthly:
        for mm in range(12):
            # get coeffficients
            fit = np.polyfit(dtime[mm::12], Y[mm::12, :], 4)
            # get time matrix to apply fit coefficients
            x = np.expand_dims(dtime[mm::12], 1)
            # get coefficients
            C = np.expand_dims(fit, 0)
            # multiply time by coefficients
            YF = C[:, 4, :] + x*C[:, 3, :] + x**2*C[:, 2, :] + x**3*C[:, 1, :] + x**4*C[:, 0, :]
            # reshape to correct spatial structure
            YF = np.reshape(YF, shp_annual)
            # add monthly fit to dataset
            dsf[lvar].values[mm::12] = YF
    else:
        # get coeffficients
        fit = np.polyfit(dtime, Y, 4)
        # get time matrix to apply fit coefficients
        x = np.expand_dims(dtime, 1)
        # get coefficients
        C = np.expand_dims(fit, 0)
        # multiply time by coefficients
        YF = C[:, 4, :] + x*C[:, 3, :] + x**2*C[:, 2, :] + x**3*C[:, 1, :] + x**4*C[:, 0, :]
        # reshape to correct spatial structure
        YF = np.reshape(YF, shp)
        # add monthly fit to dataset
        dsf[lvar].values = YF
    return dsf

### Parameters

In [3]:
# Parameters
forcesmip_root = '/glade/campaign/cgd/cas/asphilli/ForceSMIP/'
dpath_em = '/glade/work/pochedls/forcesmip/ensemble_mean/'
dpath_out = '/glade/work/pochedls/forcesmip/'
fmethod = 'fourthOrderPolynomialFit'
vmap = {'pr': ['Amon', 'pr'],
        'psl': ['Amon', 'psl'],
        'tas': ['Amon', 'tas'],
        'zmta': ['Amon', 'ta'],
        'monmaxpr': ['Aday', 'pr'],
        'monmaxtasmax': ['Aday', 'tasmax'],
        'monmintasmin': ['Aday', 'tasmin'],
        'siconc': ['OImon', 'siconc'],
        'tos': ['Omon', 'tos']}
models = ['CanESM5', 'CESM2', 'MIROC6', 'MIROC-ES2L', 'MPI-ESM1-2-LR']

### Estimate Forced Response in Training Data

In [4]:
# print progress
for vid in vmap.keys():
    # print progress
    print(vid)
    # get CMIP table
    cmipTable = vmap[vid][0]
    # get appropriate netcdf variable id
    lvar = vmap[vid][1]
    # loop over all training models
    for model in models:
        # print progress
        print('   ' + model)
        # specify data path
        dpath = forcesmip_root + '/Training/' + cmipTable + '/' + vid + '/' + model
        # get all files for model
        mfiles = glob.glob(dpath + '/*nc')
        # loop over all files / members
        for fn in mfiles:
            # get member
            if model == 'CESM2':
                member = '.'.join(fn.split('_')[-1].split('.')[0:2])
            else:
                member = fn.split('.')[0].split('_')[-1]
            # specify output path
            fnOut = dpath_out + '/training_predictions/' + vid + '_mon_' + model + '_' + fmethod + '_historical_ssp370_' + member + '.' + mfiles[0].split('.')[-1]
            fnOutMonthly = dpath_out + '/training_predictions/' + vid + '_mon_' + model + '_' + fmethod + 'Monthly_historical_ssp370_' + member + '.' + mfiles[0].split('.')[-1]
            # continue if already done
            if os.path.exists(fnOut):
                continue
            # open dataset
            ds = xc.open_dataset(fn)
            # do fit
            dsm = fit_monthly_fourth_order_polynomial(ds, lvar)
            dsa = fit_monthly_fourth_order_polynomial(ds, lvar, monthly=False)
            # save output
            dsm.to_netcdf(fnOutMonthly)
            dsa.to_netcdf(fnOut)
            # close file
            ds.close()

pr
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
psl
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
tas
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
zmta
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
monmaxpr
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
monmaxtasmax
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
monmintasmin
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
siconc
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR
tos
   CanESM5
   CESM2
   MIROC6
   MIROC-ES2L
   MPI-ESM1-2-LR


### Estimate Forced Response in Evaluation Data

In [5]:
# first ensure output path exists
if not os.path.exists(dpath_out + '/evaluation_predictions/'):
    os.makedirs(dpath_out + '/evaluation_predictions/')

# loop over variables
for vid in vmap.keys():
    # print progress
    print(vid)
    # get CMIP table
    cmipTable = vmap[vid][0]
    # get appropriate netcdf variable id
    lvar = vmap[vid][1]
    # get evaluation files
    dpath = forcesmip_root + '/Evaluation-Tier1/' + cmipTable + '/' + vid
    mfiles = glob.glob(dpath + '/*nc')
    # loop over all evaluation models
    for fn in mfiles:
        # specify output path
        member = fn.split('/')[-1].split('_')[-1].split('.')[0]
        print('   ' + member)
        fnOut = dpath_out + '/evaluation_predictions/' + vid + '_' + member + '_tier1_' + fmethod + '_benchmark.nc'
        fnOutMonthly = dpath_out + '/evaluation_predictions/' + vid + '_' + member + '_tier1_' + fmethod + 'Monthly_benchmark.nc'
        # continue if already done
        if os.path.exists(fnOut):
            continue
        # open dataset
        ds = xc.open_dataset(fn)
        # do fit
        dsfm = fit_monthly_fourth_order_polynomial(ds, lvar)
        dsfa = fit_monthly_fourth_order_polynomial(ds, lvar, monthly=False)
        # save output
        dsfm.to_netcdf(fnOutMonthly)
        dsfa.to_netcdf(fnOut)
        # close file
        ds.close()

pr
   1E
   1B
   1H
   1J
   1I
   1D
   1F
   1G
   1A
   1C
psl
   1G
   1C
   1I
   1E
   1F
   1A
   1J
   1D
   1H
   1B
tas
   1F
   1I
   1E
   1G
   1D
   1B
   1A
   1C
   1H
   1J
zmta
   1B
   1I
   1D
   1H
   1C
   1J
   1F
   1G
   1E
   1A
monmaxpr
   1C
   1I
   1B
   1F
   1A
   1G
   1E
   1J
   1H
   1D
monmaxtasmax
   1H
   1J
   1F
   1E
   1B
   1D
   1C
   1I
   1A
   1G
monmintasmin
   1I
   1E
   1H
   1J
   1B
   1D
   1F
   1A
   1G
   1C
siconc
tos
   1C
   1I
   1E
   1J
   1D
   1G
   1F
   1H
   1A
   1B
