In [1]:
from sklearn.metrics import root_mean_squared_error, mean_absolute_error
from prophet import Prophet
from b_feature_engineering import df_prelag, df_clolag
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [None]:
# This function evaluate linear model for the initial train size and iterating forward creating models for each day
# Prediction is done for each model using model created in each iteration

def prophet_model(df, train_size=365, target='closing_price', index='date'):
    preds = []
    actuals = []
    dates = []

    # Ensure proper format
    df = df[[index, target]].copy()
    df[index] = pd.to_datetime(df[index])
    df = df.rename(columns={index: 'ds', target: 'y'})
    df = df['ds'].reset_index(drop=True)

    for i in range(train_size, len(df)):
        train_df = df.iloc[:i]
        test_df = df.iloc[i:i+1]

        model = Prophet(daily_seasonality=True)
        model.fit(train_df)

        # Forecast only 1 day ahead
        future = model.make_future_dataframe(periods=1, freq='D')
        forecast = model.predict(future)

        # Match forecast date with test date
        forecast_day = forecast[forecast['ds'] == test_df['ds'].values[0]]
        if forecast_day.empty:
            continue  # Skip if prediction date not found (can happen if Prophet skips weekends)

        y_pred = forecast_day['yhat'].values[0]
        y_true = test_df['y'].values[0]
        pred_date = test_df['ds'].values[0]

        preds.append(y_pred)
        actuals.append(y_true)
        dates.append(pred_date)

    df_results = pd.DataFrame({
        "date": dates,
        "actual": actuals,
        "predicted": preds
    })

    rmse = np.sqrt(root_mean_squared_error(actuals, preds))
    print(f"Walk-forward RMSE: {rmse:.4f}")

    return df_results



In [3]:
df_eval1 = df_clolag.copy()
df_eval = prophet_model(df = df_eval1, target = 'closing_price')
rmse = root_mean_squared_error(df_eval.actual, df_eval.predicted)
mae = mean_absolute_error(df_eval.actual, df_eval.predicted)

ValueError: Dataframe must have columns "ds" and "y" with the dates and values respectively.

In [7]:
print(f"Walk-forward Root_Mean_Squared_Error (RMSE): {rmse:.2f}")
print(f"Walk-forward Mean Absolute Error (MAE): {mae:.2f}")

Walk-forward Root_Mean_Squared_Error (RMSE): 5.52
Walk-forward Mean Absolute Error (MAE): 4.54
