In [1]:
import numpy as np
import plotly.graph_objects as go
import xarray as xr
from plotly.subplots import make_subplots
from seapopym.configuration.no_transport.parameter import ForcingParameters
from seapopym.configuration.parameters.parameter_forcing import ForcingUnit
from seapopym.standard.units import StandardUnitsLabels

from seapopym_optimization import wrapper

In [2]:
time_start, time_end = "1998-01-01", "2022-01-01"

data = xr.open_dataset("../1_data_processing/1_1_Forcing/products/all_stations_cmems.zarr", engine="zarr")
data["T"].attrs["units"] = StandardUnitsLabels.temperature.units
data["npp"].attrs["units"] = "mg.day-1.m-2"
data.time.attrs["axis"] = "T"
data = data.sel(time=slice(time_start, time_end))
_ = data.load()

In [3]:
forcing_parameters = ForcingParameters(
    temperature=ForcingUnit(forcing=data["T"], resolution=1 / 12, timestep=1),
    primary_production=ForcingUnit(forcing=data["npp"], resolution=1 / 12, timestep=1),
)

|	npp unit is milligram / day / meter ** 2, it will be converted to kilogram / day / meter ** 2.
[0m


In [None]:
model_print = wrapper.model_generator_no_transport(
    forcing_parameters=forcing_parameters,
    fg_parameters=wrapper.FunctionalGroupGeneratorNoTransport(
        [[1, 1, 0.1668, 10.38, -0.11, 150, 0.15]],
        ["D1N1"],
    ),
)

model_print.run()
biomass_print = model_print.export_biomass()

---

# Error

Compute the error between 1D and 3D model at each station and show how it varies during the year.


In [18]:
COMPARISON_ALL = False
COMPARISON_ANNUAL = True
NOARMALIZED = False

In [19]:
data["zooc"].attrs["units"] = "g.m-2"
# data

In [20]:
biomass_print = biomass_print.pint.quantify().pint.to("g.m-2").pint.dequantify()
# biomass_print

In [21]:
def plot_comparison(obs, pred, current, time_axis_name, station_name):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Scatter(
            name="SeapoPym", x=pred[time_axis_name].data, y=pred.data, mode="lines", line={"color": "rgb(31, 119, 180)"}
        ),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(
            name="Upper Bound",
            x=obs[time_axis_name].data,
            y=np.max([obs.data, pred.data], 0),
            mode="lines",
            marker={"color": "#444"},
            line={"width": 0},
            showlegend=False,
        ),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(
            name="Lower Bound",
            x=obs[time_axis_name].data,
            y=np.min([obs.data, pred.data], 0),
            marker={"color": "#444"},
            line={"width": 0},
            mode="lines",
            fillcolor="rgba(68, 68, 68, 0.3)",
            fill="tonexty",
            showlegend=False,
        )
    )

    fig.add_trace(
        go.Scatter(
            name="Currents",
            x=current[time_axis_name].data,
            y=current.data,
            mode="lines",
            line={"color": "rgb(255, 127, 14)", "width": 1},
        ),
        secondary_y=True,
    )

    # Add figure title
    fig.update_layout(
        title_text=f"Comparison of SeapoPym and LMTL zooplankton at {station_name}, in addition to currents",
        width=1400,
        height=600,
    )
    fig.update_xaxes(title_text=f"{time_axis_name}")
    fig.update_yaxes(title_text="<b>Zooplankton</b> carbon concentration (g/m2)", secondary_y=False)
    fig.update_yaxes(title_text="<b>Absolute currents</b> speed", secondary_y=True)

    return fig

In [22]:
def plot_diff(error, current_diff, station_name):
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            name="Positive error",
            x=error.time.data,
            y=np.max([error.data, np.full_like(error.data, 0)], 0),
            mode="lines",
            fill="tonexty",
            fillcolor="rgba(68, 68, 68, 0.3)",
            line={"width": 0},
            showlegend=True,
        )
    )
    fig.add_trace(
        go.Scatter(
            name="Negative error",
            x=error.time.data,
            y=np.min([error.data, np.full_like(error.data, 0)], 0),
            mode="lines",
            fill="tonexty",
            fillcolor="rgba(68, 68, 68, 0.3)",
            # no color for the line
            line={"width": 0},
            showlegend=True,
        )
    )
    fig.add_trace(
        go.Scatter(
            name="Current derivative",
            x=current_diff.time.data,
            y=current_diff.data,
            mode="lines",
            line={"color": "rgb(255, 127, 14)"},
        )
    )
    # Add figure title
    fig.update_layout(
        title_text=f"Comparison of error between SeapoPym and LMTL zooplankton at {station_name}, in addition to first order derivative currents",
        width=1400,
        height=600,
    )
    fig.update_xaxes(title_text="Time")

    return fig

## BATS


In [23]:
latitude = 31.6
longitude = -64.2
station_name = "BATS"

if COMPARISON_ANNUAL:
    obs = data["zooc"].sel(latitude=latitude, longitude=longitude).groupby("time.dayofyear").mean("time")
    pred = (
        biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)
        .groupby("time.dayofyear")
        .mean("time")
    )
    current_u = data["U"].sel(latitude=latitude, longitude=longitude, depth=1)
    current_v = data["V"].sel(latitude=latitude, longitude=longitude, depth=1)
    current = (np.abs(current_u) + np.abs(current_v)).groupby("time.dayofyear").mean("time")

    plot_comparison(obs, pred, current, time_axis_name="dayofyear", station_name=station_name).show()

if COMPARISON_ALL:
    obs = data["zooc"].sel(latitude=latitude, longitude=longitude)
    pred = biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)
    current_u = data["U"].sel(latitude=latitude, longitude=longitude, depth=1)
    current_v = data["V"].sel(latitude=latitude, longitude=longitude, depth=1)
    current = np.sqrt(current_u**2 + current_v**2)
    plot_comparison(obs, pred, current, time_axis_name="time", station_name=station_name).show()

if NOARMALIZED:
    normalized_current = current / current.max()
    normalized_error = obs - pred
    normalized_error = normalized_error / normalized_error.max()
    plot_diff(current_diff=normalized_current, error=normalized_error, station_name=station_name).show()

## PAPA


In [24]:
latitude = 50
longitude = -150
station_name = "PAPA"


if COMPARISON_ANNUAL:
    obs = data["zooc"].sel(latitude=latitude, longitude=longitude).groupby("time.dayofyear").mean("time")
    pred = (
        biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)
        .groupby("time.dayofyear")
        .mean("time")
    )
    current_u = data["U"].sel(latitude=latitude, longitude=longitude, depth=1)
    current_v = data["V"].sel(latitude=latitude, longitude=longitude, depth=1)
    current = (np.abs(current_u) + np.abs(current_v)).groupby("time.dayofyear").mean("time")

    plot_comparison(obs, pred, current, time_axis_name="dayofyear", station_name=station_name).show()

if COMPARISON_ALL:
    obs = data["zooc"].sel(latitude=latitude, longitude=longitude)
    pred = biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)
    current_u = data["U"].sel(latitude=latitude, longitude=longitude, depth=1)
    current_v = data["V"].sel(latitude=latitude, longitude=longitude, depth=1)
    current = np.sqrt(current_u**2 + current_v**2)
    plot_comparison(obs, pred, current, time_axis_name="time", station_name=station_name).show()

if NOARMALIZED:
    normalized_current = current / current.max()
    normalized_error = obs - pred
    normalized_error = normalized_error / normalized_error.max()
    plot_diff(current_diff=normalized_current, error=normalized_error, station_name=station_name).show()

## HOT


In [25]:
latitude = 22.75
longitude = -158

station_name = "HOT"


if COMPARISON_ANNUAL:
    obs = data["zooc"].sel(latitude=latitude, longitude=longitude).groupby("time.dayofyear").mean("time")
    pred = (
        biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)
        .groupby("time.dayofyear")
        .mean("time")
    )
    current_u = data["U"].sel(latitude=latitude, longitude=longitude, depth=1)
    current_v = data["V"].sel(latitude=latitude, longitude=longitude, depth=1)
    current = (np.abs(current_u) + np.abs(current_v)).groupby("time.dayofyear").mean("time")

    plot_comparison(obs, pred, current, time_axis_name="dayofyear", station_name=station_name).show()

if COMPARISON_ALL:
    obs = data["zooc"].sel(latitude=latitude, longitude=longitude)
    pred = biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)
    current_u = data["U"].sel(latitude=latitude, longitude=longitude, depth=1)
    current_v = data["V"].sel(latitude=latitude, longitude=longitude, depth=1)
    current = np.sqrt(current_u**2 + current_v**2)
    plot_comparison(obs, pred, current, time_axis_name="time", station_name=station_name).show()

if NOARMALIZED:
    normalized_current = current / current.max()
    normalized_error = obs - pred
    normalized_error = normalized_error / normalized_error.max()
    plot_diff(current_diff=normalized_current, error=normalized_error, station_name=station_name).show()

---

# Correlation


In [26]:
error = biomass_print.sel(functional_group=0) - data["zooc"]

In [27]:
current = np.sqrt(data.U**2 + data.V**2)
current = current.sel(depth=1).transpose("time", "latitude", "longitude")

### BATS


In [28]:
latitude = 31.6
longitude = -64.2

coeff = np.corrcoef(
    error.sel(latitude=latitude, longitude=longitude).data,
    current.sel(latitude=latitude, longitude=longitude).data,
)[0, 1]
print(f"Correlation coefficient at BATS: {coeff}")

Correlation coefficient at BATS: 0.26275519538159225


### PAPA


In [29]:
latitude = 50
longitude = -150

coeff = np.corrcoef(
    error.sel(latitude=latitude, longitude=longitude).data,
    current.sel(latitude=latitude, longitude=longitude).data,
)[0, 1]
print(f"Correlation coefficient at PAPA: {coeff}")

Correlation coefficient at PAPA: 0.24462529082773743


### HOT


In [30]:
latitude = 22.75
longitude = -158

coeff = np.corrcoef(
    error.sel(latitude=latitude, longitude=longitude).data,
    current.sel(latitude=latitude, longitude=longitude).data,
)[0, 1]
print(f"Correlation coefficient at HOT: {coeff}")

Correlation coefficient at HOT: 0.35961096600864956


---

# Export % of error


In [52]:
latitude = 31.6
longitude = -64.2
station_name = "BATS"

obs = data["zooc"].sel(latitude=latitude, longitude=longitude)
pred = biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)

ratio_of_error = (obs - pred) / pred
ratio_of_error.attrs = {
    "units": "ratio",
    "long_name": "Ratio of error",
    "station_name": station_name,
}
ratio_of_error = ratio_of_error[1:]  # Remove the first value. No initial conditions.
ratio_of_error.to_netcdf(f"ratio_of_error_{station_name}.nc")

In [53]:
latitude = 50
longitude = -150
station_name = "PAPA"

obs = data["zooc"].sel(latitude=latitude, longitude=longitude)
pred = biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)

ratio_of_error = (obs - pred) / pred
ratio_of_error.attrs = {
    "units": "ratio",
    "long_name": "Ratio of error",
    "station_name": station_name,
}
ratio_of_error = ratio_of_error[1:]  # Remove the first value. No initial conditions.
ratio_of_error.to_netcdf(f"ratio_of_error_{station_name}.nc")

In [54]:
latitude = 22.75
longitude = -158

station_name = "HOT"

obs = data["zooc"].sel(latitude=latitude, longitude=longitude)
pred = biomass_print.sel(latitude=latitude, longitude=longitude, functional_group=0)

ratio_of_error = (obs - pred) / pred
ratio_of_error.attrs = {
    "units": "ratio",
    "long_name": "Ratio of error",
    "station_name": station_name,
}
ratio_of_error = ratio_of_error[1:]  # Remove the first value. No initial conditions.
ratio_of_error.to_netcdf(f"ratio_of_error_{station_name}.nc")