# MESMER-X example workflow

In [1]:
import pathlib

import xarray as xr

import mesmer
from mesmer.distrib import (
    ConditionalDistribution,
    Expression,
    ProbabilityIntegralTransform,
)

ModuleNotFoundError: No module named 'mesmer.weighted'

In [None]:
scenario = "ssp585"
target_name = "tasmax"
option_2ndfit = False
save_files = False

In [None]:
# set some configuration parameters
THRESHOLD_LAND = 1 / 3
esm = "IPSL-CM6A-LR"

# TODO: replace with filefinder later
# load data
test_data_path = pathlib.Path("/Users/vbauer/Developer/mesmer/tests/test-data")
test_path = test_data_path / "output" / target_name / "one_scen_one_ens" / "test-params"
cmip6_data_path = mesmer.example_data.cmip6_ng_path()

In [None]:
# load predictor data
path_tas = cmip6_data_path / "tas" / "ann" / "g025"

fN_hist = path_tas / f"tas_ann_{esm}_historical_r1i1p1f1_g025.nc"
fN_ssp585 = path_tas / f"tas_ann_{esm}_{scenario}_r1i1p1f1_g025.nc"

time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
tas_hist = xr.open_dataset(fN_hist, decode_times=time_coder).drop_vars(
    ["height", "file_qf", "time_bnds"]
)
tas_ssp585 = xr.open_dataset(fN_ssp585, decode_times=time_coder).drop_vars(
    ["height", "file_qf", "time_bnds"]
)

tas = xr.DataTree.from_dict(
    {
        "historical": tas_hist,
        "ssp585": tas_ssp585,
    }
)

In [None]:
# make global mean
# global_mean_dt = map_over_subtree(mesmer.weighted.global_mean)
tas_glob_mean = mesmer.weighted.global_mean(tas)

# load target data
path_target = cmip6_data_path / target_name / "ann" / "g025"

fN_hist = path_target / f"{target_name}_ann_{esm}_historical_r1i1p1f1_g025.nc"
fN_ssp585 = path_target / f"{target_name}_ann_{esm}_{scenario}_r1i1p1f1_g025.nc"

time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
targ_hist = xr.open_dataset(fN_hist, decode_times=time_coder)
targ_ssp585 = xr.open_dataset(fN_ssp585, decode_times=time_coder)
# make sure times align
targ_hist["time"] = tas_hist["time"]
targ_ssp585["time"] = tas_ssp585["time"]

targ_data = xr.DataTree.from_dict(
    {
        "historical": targ_hist,
        "ssp585": targ_ssp585,
    }
)

In [None]:
def mask_and_stack(ds, threshold_land):
    ds = mesmer.mask.mask_ocean_fraction(ds, threshold_land)
    ds = mesmer.mask.mask_antarctica(ds)
    ds = mesmer.grid.stack_lat_lon(ds, stack_dim="gridpoint")
    return ds


# mask_and_stack_dt = map_over_subtree(mask_and_stack)
targ_data = mask_and_stack(targ_data, threshold_land=THRESHOLD_LAND)
pred_data = tas_glob_mean.copy()

weights = mesmer.datatree.map_over_datasets(xr.ones_like, pred_data)
weights = mesmer.datatree.map_over_datasets(
    lambda ds: ds.rename({"tas": "weights"}), weights
)

In [None]:
# stacking
stacked_pred, stacked_targ, stacked_weights = (
    mesmer.datatree.broadcast_and_pool_scen_ens(
        predictors=pred_data,
        target=targ_data,
        weights=weights,
        member_dim=None,
    )
)

In [None]:
# declaring analytical form of the conditional distribution
expr = "norm(loc=c1 + c2 * __tas__, scale=c3)"
expr_name = "expr1"
expression_fit = Expression(expr, expr_name, boundaries_params={}, boundaries_coeffs={})

In [None]:
# preparing optimizers that will be used for first guess and training
distrib = ConditionalDistribution(expression=expression_fit)

In [None]:
# find first guess
coeffs_fg = distrib.find_first_guess(
    predictors=stacked_pred,
    target=stacked_targ.tasmax,
    weights=stacked_weights.weights,
)
coeffs_fg

In [None]:
# training the conditional distribution
# first round
distrib.fit(
    predictors=stacked_pred,
    target=stacked_targ.tasmax,
    weights=stacked_weights.weights,
    first_guess=coeffs_fg,
)
transform_coeffs = distrib.coefficients

In [None]:
# second round if necessary
if option_2ndfit:
    transform_coeffs = distrib.fit(
        predictors=stacked_pred,
        target=stacked_targ.tasmax,
        first_guess=transform_coeffs,
        weights=stacked_weights.weights,
        sample_dim="sample",
        smooth_coeffs=True,
        r_gasparicohn=500,
    )

transform_coeffs

In [None]:
# probability integral transform on non-stacked data for AR(1) process
target_expression = Expression("norm(loc=0, scale=1)", "normal_dist")

pit = ProbabilityIntegralTransform(
    distrib_orig=distrib,
    distrib_targ=ConditionalDistribution(target_expression),
)
transf_target = pit.transform(
    data=targ_data, target_name=target_name, preds_orig=pred_data, preds_targ=None
)

In [None]:
# training of auto-regression with spatially correlated innovations
local_ar_params = mesmer.stats.fit_auto_regression_scen_ens(
    transf_target,
    ens_dim="member",
    dim="time",
    lags=1,
)

# estimate covariance matrix
# prep distance matrix
geodist = mesmer.geospatial.geodist_exact(
    lon=targ_data["historical"].lon, lat=targ_data["historical"].lat
)
# prep localizer
LOCALISATION_RADII = range(1750, 2001, 250)
phi_gc_localizer = mesmer.stats.gaspari_cohn_correlation_matrices(
    geodist=geodist, localisation_radii=LOCALISATION_RADII
)


localized_ecov = mesmer.stats.find_localized_empirical_covariance(
    data=stacked_targ[target_name],
    weights=stacked_weights.weights,
    localizer=phi_gc_localizer,
    dim="sample",
    k_folds=30,
)

localized_ecov["localized_covariance_adjusted"] = mesmer.stats.adjust_covariance_ar1(
    localized_ecov.localized_covariance, local_ar_params.coeffs
)

In [None]:
file_end = f"{target_name}_{expr_name}_{esm}_{scenario}"
distrib_file = test_path / "distrib" / f"params_transform_distrib_{file_end}.nc"
local_ar_file = test_path / "local_variability" / f"params_local_AR_{file_end}.nc"
localized_ecov_file = (
    test_path / "local_variability" / f"params_localized_ecov_{file_end}.nc"
)

if save_files:
    # save the parameters
    transform_coeffs.to_netcdf(distrib_file)
    local_ar_params.to_netcdf(local_ar_file)
    localized_ecov.to_netcdf(localized_ecov_file)