In [None]:
# --- forecast_template.ipynb ---

# 📦 Import forecast engine
from forecast_engine.ols_engine import fit_linear_regression, predict_linear_regression, evaluate_model, add_fitted_and_residuals
from forecast_engine.bayesian_engine import fit_bayesian_regression, simulate_bayesian_forecasts, summarize_bayesian_distribution
from forecast_engine.bootstrap_engine import simulate_bootstrap_forecasts, check_residual_stationarity, summarize_bootstrap_distribution
from forecast_engine.plotting import plot_true_vs_predicted, plot_actual_vs_fitted_vs_forecast, plot_all_forecasts, plot_input_variables
from forecast_engine.utils import get_evaluation_metrics, summarize_forecast_table_with_colors

import pandas as pd
import numpy as np

# --- Step 1: Load and prepare your data ---

# Example data load
# df = pd.read_csv('grocery_eda_dataset.csv')

# Example manual dummy data:
data = {
    'year': pd.date_range('2000', '2030', freq='YS'),
    'dep_var': np.random.normal(100, 10, 31),
    'X1': np.random.normal(50, 5, 31),
    'X2': np.random.normal(30, 3, 31)
}

# Build DataFrame


# --- Step 2: Split into Train, Test, and Forecast sets ---

df['year'] = pd.to_datetime(df['year'])
df = df.set_index('year')

# Control Definitions
dep = 'dep_var'
ind = ['X1', 'X2']
start_training_date = '2000-01-01'
end_training_date = '2019-12-31'
start_test_date = '2020-01-01'
end_test_date = '2024-12-31'

X_train = df.loc[start_training_date:end_training_date, ind]
y_train = df.loc[start_training_date:end_training_date, dep]

X_test = df.loc[start_test_date:end_test_date, ind]
y_test = df.loc[start_test_date:end_test_date, dep]

X_train_fcst = df.loc[start_training_date:end_test_date, ind]
y_train_fcst = df.loc[start_training_date:end_test_date, dep]

# --- Step 3: Train OLS Model ---

beta = fit_linear_regression(X_train, y_train)
y_fitted_train = predict_linear_regression(X_train, beta)

evaluate_model(y_train, y_fitted_train)
plot_true_vs_predicted(y_train, y_fitted_train)

# --- Step 4: Retrain on Full 2000-2024 for Forecast ---

beta_fcst = fit_linear_regression(X_train_fcst, y_train_fcst)
y_fitted_fcst = predict_linear_regression(X_train_fcst, beta_fcst)

df = add_fitted_and_residuals(df, y_train_fcst, y_fitted_fcst)

# --- Step 5: Prepare Forecast Inputs ---

X_future_dict = {
    '2025-01-01': {'X1': 52, 'X2': 32},
    '2026-01-01': {'X1': 53, 'X2': 33},
    '2027-01-01': {'X1': 54, 'X2': 34},
    '2028-01-01': {'X1': 55, 'X2': 35},
    '2029-01-01': {'X1': 56, 'X2': 36},
}

X_future_array = np.array([
    [X_future_dict[year][var] for var in ind] for year in X_future_dict
])

future_years = pd.to_datetime(list(X_future_dict.keys()))

# --- Step 6: Predict OLS Forward ---

y_future_fcst = predict_linear_regression(X_future_array, beta_fcst)

# Merge Forecasts
df_forecast = pd.DataFrame(index=future_years)
df_forecast['y_comb'] = y_future_fcst

df_combined = pd.concat([df, df_forecast])

# Create y_comb as the unified column for actuals + forecast
df_combined['y_comb'] = df_combined['y_fcst_ols']
df_combined['y_comb'].fillna(df_combined[dep], inplace=True)

# Flag forecast rows (no fitted values, but a y_comb exists)
df_combined['is_forecast'] = df_combined['y_fitted'].isna() & df_combined['y_comb'].notna()

# --- Step 7: Fit Bayesian Model ---

trace = fit_bayesian_regression(X_train_fcst, y_train_fcst)
simulated_forecasts_bayes = simulate_bayesian_forecasts(X_future_array, trace)
summary_bayes = summarize_bayesian_distribution(simulated_forecasts_bayes)

df_combined.loc[future_years, 'y_fcst_bayes_mean'] = summary_bayes['mean'].values
df_combined.loc[future_years, 'y_fcst_bayes_p5'] = summary_bayes['p5'].values
df_combined.loc[future_years, 'y_fcst_bayes_p95'] = summary_bayes['p95'].values

# --- Step 8: Residual Bootstrap ---

check_residual_stationarity(df['residuals'].dropna())
residuals_train = df['residuals'].dropna().values

simulated_forecasts_bootstrap = simulate_bootstrap_forecasts(X_future_array, beta_fcst, residuals_train)
summary_bootstrap = summarize_bootstrap_distribution(simulated_forecasts_bootstrap)

df_combined.loc[future_years, 'y_fcst_bootstrap'] = summary_bootstrap['mean'].values
df_combined.loc[future_years, 'y_fcst_bootstrap_p5'] = summary_bootstrap['p5'].values
df_combined.loc[future_years, 'y_fcst_bootstrap_p95'] = summary_bootstrap['p95'].values

# --- Step 9: Final Visuals ---

plot_actual_vs_fitted_vs_forecast(df_combined, dep)
plot_all_forecasts(df_combined, dep)

# --- Step 10: Summary Tables ---

styled_table = summarize_forecast_table_with_colors(df_combined, future_years)
styled_table
