In [1]:
# helps fix a bug between pymc3 and numpy
import numpy as np
import pandas as pd
import pymc as pm

In [2]:
from forecast_engine.bayesian_engine import fit_bayesian_regression
trace = fit_bayesian_regression(X_train_fcst, y_train_fcst)

ModuleNotFoundError: No module named 'aesara'

In [None]:
# 📦 Import forecast engine
from forecast_engine.ols_engine import fit_linear_regression, predict_linear_regression, evaluate_model, add_fitted_and_residuals
from forecast_engine.bayesian_engine import fit_bayesian_regression, simulate_bayesian_forecasts, summarize_bayesian_distribution
from forecast_engine.bootstrap_engine import simulate_bootstrap_forecasts, check_residual_stationarity, summarize_bootstrap_distribution
from forecast_engine.plotting import plot_true_vs_predicted, plot_actual_vs_fitted_vs_forecast, plot_all_forecasts, plot_input_variables
from forecast_engine.utils import get_evaluation_metrics, summarize_forecast_table_with_colors

In [None]:
# --- Step 1: Load and prepare your data ---

# Example data load
# Create the foundational dataset
df = pd.read_csv('grocery_eda_dataset.csv')
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df = df.set_index('date')

# define the variables
dep = 'grocery_sales'
ind = ['cpi_fah', 'rdi_adj', 'home_price', 'covid1', 'covid2']

In [None]:
# --- Step 2: Define train and test periods ---

# Define Training and Testing periods
start_training_date = '2004-01-01'    # Start of your dataset
end_training_date = '2022-12-31'      # Last month of training data

start_test_date = '2023-01-01'         # First month of testing data
end_test_date = '2025-03-31'           # Last month of testing data

X_train = df.loc[start_training_date:end_training_date, ind]
y_train = df.loc[start_training_date:end_training_date, dep]

X_test = df.loc[start_test_date:end_test_date, ind]
y_test = df.loc[start_test_date:end_test_date, dep]

X_train_fcst = df.loc[start_training_date:end_test_date, ind]
y_train_fcst = df.loc[start_training_date:end_test_date, dep]

In [None]:
# --- Step 3: Train OLS Model ---

beta = fit_linear_regression(X_train, y_train)
y_fitted_train = predict_linear_regression(X_train, beta)

evaluate_model(y_train, y_fitted_train)
plot_true_vs_predicted(y_train, y_fitted_train)

In [None]:
# Generate predictions on test set
y_pred_test = predict_linear_regression(X_test, beta)

# Evaluate model performance on test set
evaluate_model(y_test, y_pred_test)

# Plot actual vs predicted values for test set
plot_true_vs_predicted(y_test, y_pred_test)


In [None]:
# --- Step 4: Retrain on Full 2000-2024 for Forecast ---

beta_fcst = fit_linear_regression(X_train_fcst, y_train_fcst)
y_fitted_fcst = predict_linear_regression(X_train_fcst, beta_fcst)

df = add_fitted_and_residuals(df, y_train_fcst, y_fitted_fcst)

In [None]:
# 📥 Import future assumptions
from grocery_sales_input_202504 import forward_inputs

# Build X_future_dict dynamically from imported forward_inputs
X_future_dict = {}

# Assume all variables have the same months
future_months = list(forward_inputs[list(forward_inputs.keys())[0]].keys())

for month in future_months:
    X_future_dict[month] = {}
    for var in forward_inputs.keys():
        X_future_dict[month][var] = forward_inputs[var][month]

# --- ✨ Skip 'grocery_sales_lag1' when building initial X_future_array

# Create future array ONLY with variables you actually have
forecast_vars = ['cpi_fah', 'rdi_adj', 'home_price', 'covid1', 'covid2']

X_future_array = np.array([
    [X_future_dict[year][var] for var in forecast_vars] for year in X_future_dict
])

# Save the list of future years
future_years = pd.to_datetime(list(X_future_dict.keys()))


In [None]:
# --- Step 6: Predict OLS Forward ---

y_future_fcst = predict_linear_regression(X_future_array, beta_fcst)

# Merge Forecasts
df_forecast = pd.DataFrame(index=future_years)
df_forecast['y_comb'] = y_future_fcst

df_combined = pd.concat([df, df_forecast])
df_combined['y_comb'].fillna(df_combined[dep], inplace=True)

In [None]:
# --- Step 7: Fit Bayesian Model ---

trace = fit_bayesian_regression(X_train_fcst, y_train_fcst)
simulated_forecasts_bayes = simulate_bayesian_forecasts(X_future_array, trace)
summary_bayes = summarize_bayesian_distribution(simulated_forecasts_bayes)

df_combined.loc[future_years, 'y_fcst_bayes_mean'] = summary_bayes['mean'].values
df_combined.loc[future_years, 'y_fcst_bayes_p5'] = summary_bayes['p5'].values
df_combined.loc[future_years, 'y_fcst_bayes_p95'] = summary_bayes['p95'].values

In [None]:
# --- Step 8: Residual Bootstrap ---

check_residual_stationarity(df['residuals'].dropna())
residuals_train = df['residuals'].dropna().values

simulated_forecasts_bootstrap = simulate_bootstrap_forecasts(X_future_array, beta_fcst, residuals_train)
summary_bootstrap = summarize_bootstrap_distribution(simulated_forecasts_bootstrap)

df_combined.loc[future_years, 'y_fcst_bootstrap'] = summary_bootstrap['mean'].values
df_combined.loc[future_years, 'y_fcst_bootstrap_p5'] = summary_bootstrap['p5'].values
df_combined.loc[future_years, 'y_fcst_bootstrap_p95'] = summary_bootstrap['p95'].values

In [None]:
# --- Step 9: Final Visuals ---

plot_actual_vs_fitted_vs_forecast(df_combined, dep)
plot_all_forecasts(df_combined, dep)

In [None]:
# --- Step 10: Summary Tables ---

styled_table = summarize_forecast_table_with_colors(df_combined, future_years)
styled_table

### FULL CODE ###

In [None]:
# # --- forecast_template.ipynb ---

# # helps fix a bug between pymc3 and numpy
# import numpy as np
# if not hasattr(np, 'bool'):
#     np.bool = bool

# # 📦 Import forecast engine
# from forecast_engine.ols_engine import fit_linear_regression, predict_linear_regression, evaluate_model, add_fitted_and_residuals
# from forecast_engine.bayesian_engine import fit_bayesian_regression, simulate_bayesian_forecasts, summarize_bayesian_distribution
# from forecast_engine.bootstrap_engine import simulate_bootstrap_forecasts, check_residual_stationarity, summarize_bootstrap_distribution
# from forecast_engine.plotting import plot_true_vs_predicted, plot_actual_vs_fitted_vs_forecast, plot_all_forecasts, plot_input_variables
# from forecast_engine.utils import get_evaluation_metrics, summarize_forecast_table_with_colors

# import pandas as pd
# import numpy as np

# # --- Step 1: Load and prepare your data ---

# # Example data load
# # Create the foundational dataset
# df = pd.read_csv('grocery_eda_dataset.csv')
# df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
# df = df.set_index('date')

# # define the variables
# dep = 'grocery_sales'
# ind = ['grocery_sales_lag1', 'cpi_fah', 'rdi_adj', 'home_price', 'covid1', 'covid2']

# # --- Step 2: Define train and test periods ---

# # Define Training and Testing periods
# start_training_date = '2004-01-01'    # Start of your dataset
# end_training_date = '2022-12-31'      # Last month of training data

# start_test_date = '2023-01-01'         # First month of testing data
# end_test_date = '2025-03-31'           # Last month of testing data

# X_train = df.loc[start_training_date:end_training_date, ind]
# y_train = df.loc[start_training_date:end_training_date, dep]

# X_test = df.loc[start_test_date:end_test_date, ind]
# y_test = df.loc[start_test_date:end_test_date, dep]

# X_train_fcst = df.loc[start_training_date:end_test_date, ind]
# y_train_fcst = df.loc[start_training_date:end_test_date, dep]

# # --- Step 3: Train OLS Model ---

# beta = fit_linear_regression(X_train, y_train)
# y_fitted_train = predict_linear_regression(X_train, beta)

# evaluate_model(y_train, y_fitted_train)
# plot_true_vs_predicted(y_train, y_fitted_train)

# # --- Step 4: Retrain on Full 2000-2024 for Forecast ---

# beta_fcst = fit_linear_regression(X_train_fcst, y_train_fcst)
# y_fitted_fcst = predict_linear_regression(X_train_fcst, beta_fcst)

# df = add_fitted_and_residuals(df, y_train_fcst, y_fitted_fcst)

# # --- Step 5: Prepare Forecast Inputs ---

# # 📥 Import future assumptions
# from grocery_sales_input_202504 import forward_inputs

# # Build X_future_dict dynamically from imported forward_inputs
# X_future_dict = {}

# # Assume all variables have the same months
# future_months = list(forward_inputs[list(forward_inputs.keys())[0]].keys())

# for month in future_months:
#     X_future_dict[month] = {}
#     for var in forward_inputs.keys():
#         X_future_dict[month][var] = forward_inputs[var][month]

# # Turn into a numpy array
# X_future_array = np.array([
#     [X_future_dict[year][var] for var in ind] for year in X_future_dict
# ])

# # Save the list of future years
# future_years = pd.to_datetime(list(X_future_dict.keys()))

# # --- Step 6: Predict OLS Forward ---

# y_future_fcst = predict_linear_regression(X_future_array, beta_fcst)

# # Merge Forecasts
# df_forecast = pd.DataFrame(index=future_years)
# df_forecast['y_comb'] = y_future_fcst

# df_combined = pd.concat([df, df_forecast])
# df_combined['y_comb'].fillna(df_combined[dep], inplace=True)

# # --- Step 7: Fit Bayesian Model ---

# trace = fit_bayesian_regression(X_train_fcst, y_train_fcst)
# simulated_forecasts_bayes = simulate_bayesian_forecasts(X_future_array, trace)
# summary_bayes = summarize_bayesian_distribution(simulated_forecasts_bayes)

# df_combined.loc[future_years, 'y_fcst_bayes_mean'] = summary_bayes['mean'].values
# df_combined.loc[future_years, 'y_fcst_bayes_p5'] = summary_bayes['p5'].values
# df_combined.loc[future_years, 'y_fcst_bayes_p95'] = summary_bayes['p95'].values

# # --- Step 8: Residual Bootstrap ---

# check_residual_stationarity(df['residuals'].dropna())
# residuals_train = df['residuals'].dropna().values

# simulated_forecasts_bootstrap = simulate_bootstrap_forecasts(X_future_array, beta_fcst, residuals_train)
# summary_bootstrap = summarize_bootstrap_distribution(simulated_forecasts_bootstrap)

# df_combined.loc[future_years, 'y_fcst_bootstrap'] = summary_bootstrap['mean'].values
# df_combined.loc[future_years, 'y_fcst_bootstrap_p5'] = summary_bootstrap['p5'].values
# df_combined.loc[future_years, 'y_fcst_bootstrap_p95'] = summary_bootstrap['p95'].values

# # --- Step 9: Final Visuals ---

# plot_actual_vs_fitted_vs_forecast(df_combined, dep)
# plot_all_forecasts(df_combined, dep)

# # --- Step 10: Summary Tables ---

# styled_table = summarize_forecast_table_with_colors(df_combined, future_years)
# styled_table
