In [21]:
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, mean_squared_error
from prophet import Prophet
from b_feature_engineering import df_prelag, df_clolag
import pandas as pd
import numpy as np

In [22]:
# This function evaluate linear model for the initial train size and iterating forward creating models for each day
# Prediction is done for each model using model created in each iteration

def prophet_model(df, train_size=365, target='', index='date'):
    preds = []
    actuals = []
    dates = []

    # Ensure proper format
    df = df[[index, target]].copy()
    df[index] = pd.to_datetime(df[index])
    df = df.rename(columns={index: 'ds', target: 'y'})
    

    for i in range(train_size, len(df)):
        train_df = df.iloc[:i]
        test_df = df.iloc[i:i+1]

        model = Prophet(daily_seasonality=True)
        model.fit(train_df)

        # Forecast only 1 day ahead
        future = model.make_future_dataframe(periods=1, freq='D')
        forecast = model.predict(future)

        # Match forecast date with test date
        forecast_day = forecast[forecast['ds'] == test_df['ds'].values[0]]
        if forecast_day.empty:
            continue  # Skip if prediction date not found (can happen if Prophet skips weekends)

        y_pred = forecast_day['yhat'].values[0]
        y_true = test_df['y'].values[0]
        pred_date = test_df['ds'].values[0]

        preds.append(y_pred)
        actuals.append(y_true)
        dates.append(pred_date)

    df_results = pd.DataFrame({
        "date": dates,
        "actual": actuals,
        "predicted": preds
    })

    return df_results



In [17]:
df_eval1 = df_clolag.copy()
df_eval1.head(1)

Unnamed: 0,date,closing_price,clolag_1,clolag_2,clolag_3,clolag_4,clolag_5
0,2023-01-30,109.76,109.85,108.83,108.05,108.61,108.66


In [20]:

df_eval = prophet_model(df = df_eval1, target = 'closing_price')
rmse = root_mean_squared_error(df_eval.actual, df_eval.predicted)
mae = mean_absolute_error(df_eval.actual, df_eval.predicted)

18:31:28 - cmdstanpy - INFO - Chain [1] start processing
18:31:28 - cmdstanpy - INFO - Chain [1] done processing
18:31:28 - cmdstanpy - INFO - Chain [1] start processing
18:31:28 - cmdstanpy - INFO - Chain [1] done processing
18:31:28 - cmdstanpy - INFO - Chain [1] start processing
18:31:28 - cmdstanpy - INFO - Chain [1] done processing
18:31:28 - cmdstanpy - INFO - Chain [1] start processing
18:31:28 - cmdstanpy - INFO - Chain [1] done processing
18:31:28 - cmdstanpy - INFO - Chain [1] start processing
18:31:28 - cmdstanpy - INFO - Chain [1] done processing
18:31:29 - cmdstanpy - INFO - Chain [1] start processing
18:31:29 - cmdstanpy - INFO - Chain [1] done processing
18:31:29 - cmdstanpy - INFO - Chain [1] start processing
18:31:29 - cmdstanpy - INFO - Chain [1] done processing
18:31:29 - cmdstanpy - INFO - Chain [1] start processing
18:31:29 - cmdstanpy - INFO - Chain [1] done processing
18:31:29 - cmdstanpy - INFO - Chain [1] start processing
18:31:29 - cmdstanpy - INFO - Chain [1]

In [None]:
print(f"Walk-forward Root_Mean_Squared_Error (RMSE): {rmse:.2f}")
print(f"Walk-forward Mean Absolute Error (MAE): {mae:.2f}")b

Walk-forward Root_Mean_Squared_Error (RMSE): 5.52
Walk-forward Mean Absolute Error (MAE): 4.54


When compared with the all three base line models and other machine learning models prophet got the worst accuracy. 