# Example execution of MESMER-M workflow
Training and emulation of monthly local temperature from yearly local temperature. We use an example data set on a coarse (20° x 20°) grid.

Import libraries and check MESMER version:

In [None]:
import xarray as xr

import mesmer

mesmer.__version__

## Calibrate emulator

### Configuration

In [None]:
LOCALISATION_RADII = list(range(1250, 6251, 250)) + list(range(6500, 8501, 500))
THRESHOLD_LAND = 1 / 3
REF_PERIOD = slice("1850", "1900")

In [None]:
# define paths of the example data

model = "IPSL-CM6A-LR"
cmip6_data_path = mesmer.example_data.cmip6_ng_path()

path_tas_ann = cmip6_data_path / "tas" / "ann" / "g025"
fN_hist_ann = path_tas_ann / f"tas_ann_{model}_historical_r1i1p1f1_g025.nc"
fN_proj_ann = path_tas_ann / f"tas_ann_{model}_ssp585_r1i1p1f1_g025.nc"

path_tas_mon = cmip6_data_path / "tas" / "mon" / "g025"
fN_hist_mon = path_tas_mon / f"tas_mon_{model}_historical_r1i1p1f1_g025.nc"
fN_proj_mon = path_tas_mon / f"tas_mon_{model}_ssp585_r1i1p1f1_g025.nc"

### Load Data for training the emulator

In [None]:
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
# yearly temperature
tas_y = xr.open_mfdataset(
    [fN_hist_ann, fN_proj_ann],
    combine="by_coords",
    decode_times=time_coder,
    combine_attrs="override",
    data_vars="minimal",
    compat="override",
    coords="minimal",
    drop_variables=["height", "file_qf"],
).load()

# monthly temperature
tas_m = xr.open_mfdataset(
    [fN_hist_mon, fN_proj_mon],
    combine="by_coords",
    decode_times=time_coder,
    combine_attrs="override",
    data_vars="minimal",
    compat="override",
    coords="minimal",
    drop_variables=["height", "file_qf"],
).load()

### Preprocessing

Calculate anomalies w.r.t the reference period

In [None]:
ref_y = tas_y.sel(time=REF_PERIOD).mean("time", keep_attrs=True)
ref_m = tas_m.sel(time=REF_PERIOD).mean("time", keep_attrs=True)

tas_y = tas_y - ref_y
tas_m = tas_m - ref_m

We only use land grid points and exclude Antarctica. The 3D data with dimensions `('time', 'lat', 'lon')` is stacked to 2D data with dimensions `('time', 'gridcell')`:

In [None]:
def mask_and_stack(ds, threshold_land):
    ds = mesmer.mask.mask_ocean_fraction(ds, threshold_land)
    ds = mesmer.mask.mask_antarctica(ds)
    ds = mesmer.grid.stack_lat_lon(ds)
    return ds

In [None]:
tas_stacked_y = mask_and_stack(tas_y, threshold_land=THRESHOLD_LAND)
tas_stacked_m = mask_and_stack(tas_m, threshold_land=THRESHOLD_LAND)

### Fit the harmonic model

Fit the seasonal cycle with a harmonic model which can vary with local annual mean temperature
(fourier regression). Removes annual mean and, determines the optimal order and the coefficients
of the harmonic model

In [None]:
harmonic_model_fit = mesmer.stats.fit_harmonic_model(
    tas_stacked_y.tas, tas_stacked_m.tas
)

### Train the power transformer

The residuals are not necessarily symmetric - make them more normal using a Yeo-Johnson
transformation. The parameter $\lambda$ is modelled with a logistic regression using
local annual mean temperature as covariate.

In [None]:
pt_coefficients = mesmer.stats.fit_yeo_johnson_transform(
    tas_stacked_y.tas, harmonic_model_fit.residuals
)
transformed_hm_resids = mesmer.stats.yeo_johnson_transform(
    tas_stacked_y.tas, harmonic_model_fit.residuals, pt_coefficients
)

### Fit cyclo-stationary AR(1) process

The monthly residuals are now assumed to follow a cyclo-stationary AR(1) process, where e.g. the July residuals depend on the ones from June and the ones of June on May's with distinct parameters.

In [None]:
AR1_fit = mesmer.stats.fit_auto_regression_monthly(
    transformed_hm_resids.transformed, time_dim="time"
)

### Find localized empirical covariance

Finally, we determine the localized empirical spatial covariance for each month separately:

In [None]:
geodist = mesmer.geospatial.geodist_exact(tas_stacked_y.lon, tas_stacked_y.lat)

phi_gc_localizer = mesmer.stats.gaspari_cohn_correlation_matrices(
    geodist, localisation_radii=LOCALISATION_RADII
)

weights = xr.ones_like(AR1_fit.residuals.isel(gridcell=0))
weights.name = "weights"

localized_ecov = mesmer.stats.find_localized_empirical_covariance_monthly(
    AR1_fit.residuals, weights, phi_gc_localizer, "time", 30
)

### Saving

### time coordinate
We need to get the original time coordinate to be able to validate our results later on. If it is not needed to align the final emulations with the original data, this can be omitted, the time coordinates can later be generated for example with 


```python
monthly_time = xr.date_range("1850-01-01", "2100-12-31", freq="MS", calendar="gregorian")
monthly_time = xr.DataArray(monthly_time, dims="time", coords={"time": monthly_time})
```

In [None]:
# extract and save time coordinate
m_time = tas_stacked_m.time

# TODO
# save the parameters to a file
# harmonic_model_fit
# pt_coefficients
# AR1_fit
# localized_ecov
# m_time

## Make emulations

To generate emulations the workflow of the calibration is reversed, using the estimated parameters from above. Here, we use the same local annual mean temperatures to force the emulations, but temperatures from other models, scenarios, ensemble members or emulated annual local temperatures can be used as well.

In [None]:
# # Re-import necessary libraries
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import xarray as xr

### Configuration

In [None]:
# parameters
NR_EMUS = 10
BUFFER = 20
# REF_PERIOD = slice("1850", "1900")

#### Random number seed

The `seed` determines the initial state for the random number generator. To avoid generating the same noise for different models and scenarios different seeds are required for each individual paring. For reproducibility the seed needs to be the same for any subsequent draw of the same emulator. To avoid human chosen standard seeds (e.g. `0`, `1234`) its recommended to also randomly generate the seeds and save them for later, using

```python
import secrets
secrets.randbits(128)
```

In [None]:
# random but constant
SEED = 234361146192407661971285321853135632294

### Load data needed for emulations

In [None]:
# TODO
# load the parameters from a file
# in this example notebook we directly use the calibration from above

In [None]:
# TODO
# load yearly temperature
# in this example we are using the original yearly temperature for demonstration

### Preprocessing

In [None]:
# preprocess tas
# ref = tas_y.sel(time=REF_PERIOD).mean("time", keep_attrs=True)
# tas_y = tas_y - ref
# tas_stacked_y = mask_and_stack(tas_y, threshold_land=THRESHOLD_LAND)

In [None]:
# get the original grid for transforming back later
grid_orig = ref_y[["lat", "lon"]]

### Generate emulations

In [None]:
# generate monthly data with harmonic model
monthly_harmonic_emu = mesmer.stats.predict_harmonic_model(
    tas_stacked_y.tas, harmonic_model_fit.coeffs, m_time
)

# generate variability around 0 with AR(1) model
local_variability_transformed = mesmer.stats.draw_auto_regression_monthly(
    AR1_fit,
    localized_ecov.localized_covariance,
    time=m_time,
    n_realisations=NR_EMUS,
    seed=SEED,
    buffer=BUFFER,
)

# invert the power transformation
local_variability_inverted = mesmer.stats.inverse_yeo_johnson_transform(
    tas_stacked_y.tas, local_variability_transformed, pt_coefficients
)

# add the local variability to the monthly harmonic
emulations = monthly_harmonic_emu + local_variability_inverted.inverted

In [None]:
# unstack to original grid
emulations_unstacked = mesmer.grid.unstack_lat_lon_and_align(emulations, grid_orig)

### Saving and/or Analysis

In [None]:
# TODO
# save