# EAT example #3: Parameters

Ensemble-based biogeochemical data
assimilation to estimate a parameter (maximum growth rate of diatoms) of the ERSEM
biogeochemical model at a site in the Western English Channel

## Import and configure

In [None]:
# Imports from standard library
import mpi4py.rc
mpi4py.rc.initialize = False

import sys
import os
import warnings
import shutil
warnings.filterwarnings('ignore')

# Import of third party packages
import numpy as np
%matplotlib widget
from matplotlib import pyplot
import cmocean

# EAT itself
import eatpy

# Shared postprocessing scripts
sys.path.append("..")
import shared

# For reproducibility, use seed.dat to store/load random seed
rng = np.random.default_rng(shared.seed("seed.dat"))

# Experiment configuration
N = 50   # ensemble size
varname = "P1_Chl"
parname = "instances/P1/parameters/sum"
cmap = cmocean.cm.algae

# For automated testing we reduce the ensemble size
if "GITHUB_ACTIONS" in os.environ:
    N = 2

## Load observations

In [None]:
# Read satellite observations of surface chlorophyll
obs = shared.read_0d_observations("P1_Chl_cci_5d.dat")

# Reference simulation

Forecast-only, no data assimilation

In [None]:
# Set up the initial state, which is the result of a 7-year spin-up
shutil.copyfile("restart_01112014.nc", "restart.nc")

# Run the model
!eat-gotm

## Load and plot results

In [None]:
# Load results from GOTM's NetCDF output
time, z, ref, long_name, units = shared.read_result("result.nc", varname)

# Create figure
fig, ((ax1, cax1), (ax2, cax2)) = pyplot.subplots(
    figsize=(8, 6), nrows=2, ncols=2, width_ratios=[0.95, 0.05], sharex="col"
)

# Plot time series of surface chlorophyll
shared.plot_0d_timeseries(ax1, time, ref[:, -1], obs)
ax1.set_ylabel(f"{long_name} ({units})")
ax1.set_title(f"surface {long_name}")

cax1.axis("off")

# Plot time-varying depth distributon of chlorophyll
pc, cb = shared.plot_1d_timeseries(ax2, time, z, ref, 20, cax=cax2, cmap=cmap)
cb.set_label(f"{long_name} ({units})")
ax2.set_title(long_name)

fig.tight_layout()

# Data assimilation

## Create the ensemble

In [None]:
gotm = eatpy.models.gotm.YAMLEnsemble("gotm.yaml", N)
fabm = eatpy.models.gotm.YAMLEnsemble("fabm.yaml", N)
ref_par_value = fabm[parname]
with gotm, fabm:
    gotm["fabm/yaml_file"] = fabm.file_paths
    fabm[parname] *= rng.uniform(low=0.7, high=1.3, size=N)

## Run data assimilation experiment

The command in the cell below runs EAT locally. This includes the run
script and all N ensemble members, that is, N+1 processes in total.
If you have EAT installed on a HPC cluster, you can often speed
up the experiment by placing the command in a job submission script
(replace `{N}` by your desired ensemble size) and submitting that
to your cluster's queuing system. In that case, there is no need to
execute the cell below.

In [None]:
# Set up the initial state, which is the result of a 7-year spin-up
# Note that this needs to be done again after the free run,
# as this run has saved its final state in restart.nc.
shutil.copyfile('restart_01112014.nc', 'restart.nc')

# Run the experiment
!mpiexec -n 1 python run.py : -n {N} eat-gotm --separate_gotm_yaml

## Load and plot results

### Chlorophyll

In [None]:
# Load results
enstime, ensz, ens, long_name, units = shared.read_ensemble_result(
    "result.nc", varname, N
)

# Create figure
fig, ((ax1, cax1), (ax2, cax2), (ax3, cax3)) = pyplot.subplots(
    figsize=(8, 10),
    nrows=3,
    ncols=2,
    sharex="col",
    width_ratios=[0.95, 0.05],
    height_ratios=[0.4, 0.3, 0.3],
)

# Plot surface chlorophyll
shared.plot_0d_ensemble_timeseries(
    ax1, enstime, ens[:, :, -1], [("model, free run", ref[:, -1])], obs, label="with DA"
)
ax1.set_ylabel(f"{long_name} ({units})")
ax1.set_title("surface diatom chlorophyll")
ax1.set_ylim(0.0, 4.0)
cax1.axis("off")

# Plot chlorophyll throughout the water column for free run
chl_contours = np.linspace(0.0, 2.0, 11)
pc, cb = shared.plot_1d_timeseries(
    ax2, time, z, ref, chl_contours, cmap=cmap, extend="max", cax=cax2
)
cb.set_label("chlorophyll (mg m⁻³)")
ax2.set_title("simulated diatom chlorophyll (free run)")

# Plot chlorophyll throughout the water column for run with DA
_, cb = shared.plot_1d_ensemble_timeseries(
    ax3, enstime, ensz, ens, chl_contours, cmap=cmap, extend="max", cax=cax3
)
cb.set_label("chlorophyll (mg m⁻³)")
ax3.set_title("simulated diatom chlorophyll (DA)")

fig.tight_layout()

fig.savefig("chl_da.png", dpi=150)

### Time variation in diatom growth rate

In [None]:
# Load result. Fill reference time series with original [constant] parameter value
enstime, _, par_ens, long_name, units = shared.read_ensemble_result(
    "result.nc", parname.replace("/", "_"), N
)
constant_par = np.full_like(par_ens[0], ref_par_value)

# Create figure and plot parameter time series
fig, ax = pyplot.subplots(figsize=(8, 4))
shared.plot_0d_ensemble_timeseries(
    ax, enstime, par_ens, [("no DA", constant_par)], filter_period=31
)
ax.set_ylabel(f"maximum specific productivity (d⁻¹)")
ax.set_title("diatom maximum specific productivity")
fig.tight_layout()
fig.savefig("par_da.png", dpi=150)