# Trend Estimation and Analysis

In [None]:
import os

import pandas as pd
import numpy as np

import statsmodels.api as sm
from prophet import Prophet

import plotly.graph_objects as go

import pymannkendall as mk

In [None]:
data_folder = os.path.join("..", "data", "tarragona")
clean_data_folder = os.path.join(data_folder, "clean")

In [None]:
xerta_df = pd.read_excel(os.path.join(clean_data_folder, "xerta.xlsx"), index_col=0)

In [None]:
# create dataframe to store the adf and mann-kendall test results for each station

statistics_df = pd.DataFrame(
    index=xerta_df.columns,
    columns=["MK p-value", "MK result", "Slope", "Slope p-value"],
)

In [None]:
for column in xerta_df.columns:
    df = xerta_df[[column]].copy()

    df.dropna(inplace=True)

    date_range = df.index
    date_range = date_range.min(), date_range.max()

    # make sure that the dataframe starts and finishes in the same month
    start_index = df[df.index.month == date_range[1].month].index[0]

    # Slice the dataframe to start from the found index
    df = df.loc[start_index:]

    # ===== Prophet =====

    df.index.name = "ds"

    df = df.reset_index()

    df.rename(columns={column: "y"}, inplace=True)

    # using prophet

    model = Prophet()
    model.fit(df)
    # Make predictions for both columns
    future = model.make_future_dataframe(periods=0)
    forecast = model.predict(future)

    # Merging forecasted data with your original data
    forecasting_final = pd.merge(
        forecast,
        df,
        how="inner",
        on="ds",
    )

    # compute linear regression on trend
    X = np.arange(df.shape[0])
    X = sm.add_constant(X)
    y = df["y"].copy()

    model = sm.OLS(y, X)
    results = model.fit()

    # plot the line of the linear regression
    line = pd.Series(results.predict(X), index=df["ds"])

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=df["ds"],
            y=df["y"],
            mode="lines",
            name="Original",
        )
    )

    fig.add_trace(
        go.Scatter(
            x=forecasting_final["ds"],
            y=forecasting_final["trend"],
            mode="lines",
            name="Trend",
        )
    )

    # perfrom Mann-Kendall test
    mk_result = mk.original_test(df["y"] - forecasting_final["yearly"])

    print(f"{column} - Mann-Kendall Test")
    print(f"Monotonic Trend: {mk_result.trend}")
    print(f"p-value: {mk_result.p:.4f}")
    print()
    slope = results.params.iloc[1]
    print(f"{column} - Slope: {slope}")

    p_value = results.pvalues.iloc[1]
    print(f"{column} - P-value: {p_value}")

    statistics_df.loc[column, "MK p-value"] = mk_result.p
    statistics_df.loc[column, "MK result"] = mk_result.trend

    # store the slope
    statistics_df.loc[column, "Slope"] = slope
    statistics_df.loc[column, "Slope p-value"] = p_value

    fig.add_trace(
        go.Scatter(
            x=line.index,
            y=line,
            mode="lines",
            name=f"Linear Regression",
            line=dict(dash="dash", color="black"),
        ),
    )

    start_date = df["ds"].min()
    end_date = df["ds"].max()

    fig.update_layout(
        xaxis_title="Date",
        yaxis_title=column,
        font=dict(
            size=18,
        ),
        title=f"{column} - {start_date.strftime('%Y-%m-%d')} - {end_date.strftime('%Y-%m-%d')} - Slope: {slope:.4f}",
    )

    fig.show()

In [None]:
statistics_df