In [None]:
import numpy as np
from pygam import LinearGAM, l, s


In [None]:
def decompose_GAM(self, data, variable):
    """
    Decompose time series using GAM model into trend and seasonality,
    all the calculations are in the log10 base.

    Parameters
    ----------
        data (dataframe): must contain 'time' and the target variable to decompose
        variable (str) : name of the variable in the model

    Returns
    -------
        (trend_df,season_df):DataFrame with 'time' and 'biomass' columns

    """
    data = data.copy()
    data[variable] = np.log10(
        np.maximum(data[variable], np.finfo(float).eps)
    )  # log10 transformation, epsilon to avoid log(0)

    data = data.dropna().reset_index(drop=True)
    data["time_float"] = (data["time"] - data["time"].min()).dt.total_seconds() / (3600 * 24)

    data["month"] = data["time"].dt.month
    data["month_sin"] = np.sin(2 * np.pi * (data["month"] - 1) / 12)
    data["month_cos"] = np.cos(2 * np.pi * (data["month"] - 1) / 12)

    X = data[["time_float", "month_sin", "month_cos"]].values
    y = data[variable].values

    # For the estimation of the long-term trend, we use a spline term with n_splines=80.
    # This controls the flexibility of the spline fit over time.
    # - A higher n_splines allows the model to capture more rapid changes (but also more noise).
    # - A lower n_splines results in a smoother trend that captures only large-scale variations.
    gam = LinearGAM(s(0, n_splines=80) + l(1) + l(2), fit_intercept=False).fit(X, y)

    trend = gam.partial_dependence(term=0, X=X)
    season = gam.partial_dependence(term=1, X=X) + gam.partial_dependence(term=2, X=X)

    trend_df = pd.DataFrame({"time": data["time"].values, "biomass": trend})
    season_df = pd.DataFrame({"time": data["time"].values, "biomass": season})

    return trend_df, season_df
