In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pygam import LinearGAM, l, s
from statsmodels.tsa import seasonal

np.int = int
import scipy.sparse

scipy.sparse.csr_matrix.A = property(lambda self: self.toarray())

In [None]:
# Création d'une série temporelle sinusoïdale bruitée
np.random.seed(42)
n_points = 365 * 4
time = pd.date_range(start="2020-01-01", periods=n_points, freq="D")
season_in = np.sin(2 * np.pi * time.dayofyear / 365)
trend_in = np.ones_like(time, dtype=int) / 100
trend_in[-int(n_points / 2) :] = -1 / 100
trend_in = np.cumsum(trend_in) + 10  # tendance linéaire
noise = np.random.normal(0, 1, n_points)
biomass = season_in + noise + trend_in

data = pd.DataFrame({"time": time, "biomass": biomass}).set_index("time")

data.plot()

# Seasonal_decompose

---


In [None]:
result_seasonal_decompose = seasonal.seasonal_decompose(data, model="additive", period=365)
_ = result_seasonal_decompose.plot(weights=True)

In [None]:
result_stl = seasonal.STL(data["biomass"], period=365).fit()
_ = result_stl.plot()

In [None]:
result_stl.trend

In [None]:
# result_mstl = seasonal.MSTL(data["biomass"], periods=(int(365.25 / 4), int(365.25))).fit()
result_mstl = seasonal.MSTL(data["biomass"], periods=(int(365.25),)).fit()
_ = result_mstl.plot()

# MANUEL

---


In [None]:
data_gam = data.reset_index().copy()
data_gam["time"] = pd.to_datetime(data_gam["time"])
data_gam["day_since_start"] = np.cumsum(np.ones_like(data_gam["time"], dtype=int))
data_gam["sin_doy"] = np.sin(2 * np.pi * data_gam["day_since_start"] / 365.25)
data_gam["cos_doy"] = np.cos(2 * np.pi * data_gam["day_since_start"] / 365.25)
data_gam

In [None]:
for degree in [10, 20, 40, 80]:
    variable = "biomass"
    X = data_gam[["day_since_start", "sin_doy", "cos_doy"]].to_numpy()
    y = data_gam[variable].to_numpy()
    gam = LinearGAM(s(0, n_splines=degree) + l(1) + l(2), fit_intercept=False).fit(X, y)
    trend = gam.partial_dependence(term=0, X=X)
    season = gam.partial_dependence(term=1, X=X) + gam.partial_dependence(term=2, X=X)
    residuals = y - trend - season

    fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, figsize=(12, 6))
    plt.suptitle(f"Degree {degree}")
    ax1.plot(season, label="GAM")
    ax1.plot(season_in, label="Real")
    ax1.plot(result_stl.seasonal.to_numpy(), label="STL trend", alpha=0.2)
    ax2.plot(trend, label="GAM")
    ax2.plot(trend_in, label="Real")
    ax2.plot(result_stl.trend.to_numpy(), label="STL trend")
    ax3.plot(residuals)
    ax1.legend()
    ax2.legend()
    plt.show()

# Real data

---


In [None]:
import xarray as xr

data = xr.open_dataset(
    "/Users/adm-lehodey/Documents/Workspace/Projects/Seapopym-optimisation/notebooks/1_data_processing/1_1_Forcing/data/1_products/Bats_obs.zarr",
    engine="zarr",
)
data = (
    data.day.mean(["latitude", "longitude", "layer"])
    .to_dataframe()
    .reset_index()
    .dropna()
    .set_index("time")
    .resample("D")
    .mean()
    .interpolate("linear")
    .rename(columns={"day": "biomass"})
)
data.plot()

In [None]:
result_stl = seasonal.STL(data["biomass"], period=int(365.25)).fit()
_ = result_stl.plot()

In [None]:
result_stl


In [None]:
pd.DataFrame([result_stl.trend, result_stl.seasonal, result_stl.resid]).T

In [None]:
pd.DataFrame([result_mstl.trend, result_mstl.resid]).T.merge(result_mstl.seasonal, on="time")


In [None]:
data_gam = data.reset_index().copy()
data_gam["time"] = pd.to_datetime(data_gam["time"])
data_gam["day_since_start"] = np.cumsum(np.ones_like(data_gam["time"], dtype=int))
data_gam["sin_doy"] = np.sin(2 * np.pi * data_gam["day_since_start"] / 365.25)
data_gam["cos_doy"] = np.cos(2 * np.pi * data_gam["day_since_start"] / 365.25)
for degree in [10, 20, 40, 80]:
    variable = "biomass"
    X = data_gam[["day_since_start", "sin_doy", "cos_doy"]].to_numpy()
    y = data_gam[variable].to_numpy()
    gam = LinearGAM(s(0, n_splines=degree) + l(1) + l(2), fit_intercept=False).fit(X, y)
    trend = gam.partial_dependence(term=0, X=X)
    season = gam.partial_dependence(term=1, X=X) + gam.partial_dependence(term=2, X=X)

    fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, figsize=(12, 6))
    plt.suptitle(f"Degree {degree}")
    ax1.plot(season, label="GAM")
    ax1.plot(result_stl.seasonal.to_numpy(), label="STL trend")
    ax1.legend()
    ax1.set_title("Seasonal component")

    ax2.plot(trend, label="GAM")
    ax2.plot(result_stl.trend.to_numpy(), label="STL trend")
    ax2.legend()
    ax2.set_title("Trend component")

    ax3.plot(trend + season, label="GAM")
    ax3.plot(data["biomass"].to_numpy(), label="Real")
    ax3.plot((result_stl.trend + result_stl.seasonal).to_numpy(), label="STL trend")
    ax3.legend()
    ax3.set_ylim(0, 3)
    ax3.set_xlim(2000, 8000)
    ax3.set_title("Reconstructed series")
    plt.tight_layout()
    plt.show()
