In [6]:
import importlib

import xarray as xr

import mesmer


def _load_data(*filenames):
    # TODO: extract to a 'official' helper function

    # NOTE: open_mfdataset is considerably slower...
    # ds = xr.open_mfdataset(
    #     filenames,
    #     combine="by_coords",
    #     use_cftime=True,
    #     combine_attrs="override",
    #     data_vars="minimal",
    #     compat="override",
    #     coords="minimal",
    #     drop_variables=["height", "file_qf"],
    # ).load()

    load_opt = {"drop_variables": ["height", "file_qf"], "use_cftime": True}
    datasets = [xr.open_dataset(fN, **load_opt) for fN in filenames]

    ds = xr.combine_by_coords(
        datasets,
        combine_attrs="override",
        data_vars="minimal",
        compat="override",
        coords="minimal",
    )

    return ds

In [7]:
# define config values
THRESHOLD_LAND = 1 / 3

REFERENCE_PERIOD = slice("1850", "1900")

# LOCALISATION_RADII = list(range(1250, 6251, 250)) + list(range(6500, 8501, 500))
# restrict radii for faster tests
LOCALISATION_RADII = list(range(5750, 6251, 250)) + list(range(6500, 8001, 500))

esm = "IPSL-CM6A-LR"
scenario = "ssp585"

# define paths and load data
TEST_DATA_PATH = importlib.resources.files("mesmer").parent / "tests" / "test-data"
TEST_PATH = TEST_DATA_PATH / "output" / "tas" / "mon" / "test-params"
cmip6_data_path = TEST_DATA_PATH / "calibrate-coarse-grid" / "cmip6-ng"

# load annual data
path_tas_ann = cmip6_data_path / "tas" / "ann" / "g025"
fN_hist_ann = path_tas_ann / f"tas_ann_{esm}_historical_r1i1p1f1_g025.nc"
fN_proj_ann = path_tas_ann / f"tas_ann_{esm}_{scenario}_r1i1p1f1_g025.nc"
tas_y = _load_data(fN_hist_ann, fN_proj_ann)

# load monthly data
path_tas_mon = cmip6_data_path / "tas" / "mon" / "g025"
fN_hist_mon = path_tas_mon / f"tas_mon_{esm}_historical_r1i1p1f1_g025.nc"
fN_proj_mon = path_tas_mon / f"tas_mon_{esm}_{scenario}_r1i1p1f1_g025.nc"
tas_m = _load_data(fN_hist_mon, fN_proj_mon)

# data preprocessing
ref_y = tas_y.sel(time=REFERENCE_PERIOD).mean("time", keep_attrs=True)
ref_m = tas_m.sel(time=REFERENCE_PERIOD).mean("time", keep_attrs=True)

tas_y = tas_y - ref_y
tas_m = tas_m - ref_m


# create local gridded tas data
def mask_and_stack(ds, threshold_land):
    ds = mesmer.mask.mask_ocean_fraction(ds, threshold_land)
    ds = mesmer.mask.mask_antarctica(ds)
    ds = mesmer.grid.stack_lat_lon(ds)
    return ds


tas_stacked_y = mask_and_stack(tas_y, threshold_land=THRESHOLD_LAND)
tas_stacked_m = mask_and_stack(tas_m, threshold_land=THRESHOLD_LAND)

In [8]:
# fit harmonic model
harmonic_model_fit = mesmer.stats.fit_harmonic_model(
    tas_stacked_y.tas, tas_stacked_m.tas
)

In [11]:
# train power transformer
resids_after_hm = tas_stacked_m - harmonic_model_fit.predictions
pt_coefficients = mesmer.stats.fit_yeo_johnson_transform(
    resids_after_hm.tas, tas_stacked_y.tas
)

In [12]:
transformed_hm_resids = mesmer.stats.yeo_johnson_transform(
    resids_after_hm.tas, pt_coefficients.coeffs, tas_stacked_y.tas
)

In [13]:
# fit cyclo-stationary AR(1) process
AR1_fit = mesmer.stats.fit_auto_regression_monthly(
    transformed_hm_resids.transformed, time_dim="time"
)

# work out covariance matrix

In [14]:
geodist = mesmer.geospatial.geodist_exact(tas_stacked_y.lon, tas_stacked_y.lat)

phi_gc_localizer = mesmer.stats.gaspari_cohn_correlation_matrices(
    geodist, localisation_radii=LOCALISATION_RADII
)

weights = xr.ones_like(AR1_fit.residuals.isel(gridcell=0))
weights.name = "weights"

In [15]:
localized_ecov = mesmer.stats.find_localized_empirical_covariance_monthly(
    AR1_fit.residuals, weights, phi_gc_localizer, "time", 30
)

# we need to get the original time coordinate to be able to validate our results
m_time = tas_stacked_m.time.rename("monthly_time")

In [None]:
# save params


# testing

# load expected values
expected_hm_params = xr.open_dataset(
    TEST_PATH / "harmonic_model" / f"params_harmonic_model_tas_{esm}_{scenario}.nc",
    use_cftime=True,
)
expected_pt_params = xr.open_dataset(
    TEST_PATH
    / "power_transformer"
    / f"params_power_transformer_tas_{esm}_{scenario}.nc",
    use_cftime=True,
)
expected_AR1_params = xr.open_dataset(
    TEST_PATH / "local_variability" / f"params_AR1_tas_{esm}_{scenario}.nc",
    use_cftime=True,
)
expected_localized_ecov = xr.open_dataset(
    TEST_PATH / "local_variability" / f"params_localized_ecov_tas_{esm}_{scenario}.nc",
    use_cftime=True,
)
expected_m_time = xr.open_dataset(
    TEST_PATH / "time" / f"params_monthly_time_tas_{esm}_{scenario}.nc",
    use_cftime=True,
)

# the following parameters should be exactly the same
exact_exp_params = xr.merge(
    [
        expected_hm_params.selected_order,
        expected_localized_ecov.localization_radius,
        expected_m_time.monthly_time,
    ]
)
exact_cal_params = xr.merge(
    [
        harmonic_model_fit.selected_order,
        localized_ecov.localization_radius,
        m_time,
    ]
)

xr.testing.assert_equal(exact_exp_params, exact_cal_params)

# compare the rest
# using numpy because it outputs the differences and how many values are off
import numpy as np

# the tols are set to the best we can do
# NOTE: it is always rather few values that are off
np.testing.assert_allclose(
    expected_hm_params.coeffs,
    harmonic_model_fit.coeffs,
    atol=2e-5,
)
# NOTE: would have to be atol is 1e12 here - not doing that
np.testing.assert_allclose(expected_pt_params.coeffs, pt_coefficients.coeffs, atol=1e-4)
np.testing.assert_allclose(
    expected_AR1_params.slope, AR1_fit.slope, atol=1e-5, rtol=1e-4
)
np.testing.assert_allclose(
    expected_AR1_params.intercept,
    AR1_fit.intercept,
    atol=2e-5,
)
np.testing.assert_allclose(
    localized_ecov.localized_covariance,
    localized_ecov.localized_covariance,
)

In [32]:
np.testing.assert_allclose(
    expected_AR1_params.slope,
    AR1_fit.slope,
    atol=1e-5,
)

In [29]:
# NOTE: would have to be atol is 1e12 here - not doing that
np.testing.assert_allclose(expected_pt_params.coeffs, pt_coefficients.coeffs, atol=1e-4)

In [26]:
np.testing.assert_allclose(
    expected_AR1_params.intercept,
    AR1_fit.intercept,
    atol=2e-5,
    # rtol=1e-2 / 3,
)

In [28]:
np.testing.assert_allclose(
    localized_ecov.localized_covariance,
    localized_ecov.localized_covariance,
    # atol=1e-4,
    # rtol=1e-2,
)

In [33]:
np.testing.assert_allclose(
    expected_pt_params.coeffs,
    pt_coefficients.coeffs,  # atol=1e-4
)

AssertionError: 
Not equal to tolerance rtol=1e-07, atol=0

Mismatched elements: 29 / 2832 (1.02%)
Max absolute difference among violations: 7.11580971e-05
Max relative difference among violations: 0.00762713
 ACTUAL: array([[[ 1.339342e+00, -8.537281e-02],
        [ 1.575343e+00, -1.000000e-01],
        [ 6.099215e-01,  1.000000e-01],...
 DESIRED: array([[[ 1.339342e+00, -8.537281e-02],
        [ 1.575343e+00, -1.000000e-01],
        [ 6.099215e-01,  1.000000e-01],...