In [1]:
import os
import pandas as pd
import numpy as np
import joblib
from datetime import timedelta

In [2]:
# Load feature-engineered data
df = pd.read_csv("../data/processed/feature_data.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values("Date")


In [3]:
# Load models
rf = joblib.load("../models/rf_model.pkl")
xg = joblib.load("../models/xgb_model.pkl")


In [4]:
# Forecast horizon (say 90 days = ~3 months)
forecast_horizon = 90


In [5]:
# Start from the last known date
last_date = df['Date'].max()
future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=forecast_horizon, freq="D")



In [8]:
# Feature columns used during training
feature_cols = [c for c in df.columns if c not in ["Date","Quantity_Consumed"]]

last_data = df.iloc[-30:].copy()
current_data = last_data.copy()

future_preds = []

for d in future_dates:
    row = {"Date": d}

    # calendar features
    row["month"] = d.month
    row["dayofweek"] = d.dayofweek
    row["Year"] = d.year   # <- include because it was in training set

    # lag features
    for lag in [7,14,30]:
        row[f"lag_{lag}"] = current_data["Quantity_Consumed"].iloc[-lag]

    # rolling averages
    row["rolling_7"] = current_data["Quantity_Consumed"].iloc[-7:].mean()
    row["rolling_30"] = current_data["Quantity_Consumed"].iloc[-30:].mean()

    # carry forward last known Lead_Time & Sales_Volume
    row["Lead_Time"] = current_data["Lead_Time"].iloc[-1]
    row["Sales_Volume"] = current_data["Sales_Volume"].iloc[-1]

    # copy dummy columns (Materials, Vendors, Locations)
    for col in feature_cols:
        if col not in row and col != "Date":
            if col in current_data.columns:
                row[col] = current_data[col].iloc[-1]
            else:
                row[col] = 0   # fallback if missing

    # convert to DF
    row_df = pd.DataFrame([row])
    X_row = row_df[feature_cols]   # ensure same feature order

    # predictions
    rf_pred = rf.predict(X_row)[0]
    xg_pred = xg.predict(X_row)[0]
    hyb_pred = (rf_pred + xg_pred) / 2

    row["Forecast"] = hyb_pred

    # update history for next iteration
    new_row = row_df.copy()
    new_row["Quantity_Consumed"] = hyb_pred
    current_data = pd.concat([current_data, new_row], ignore_index=True)

    future_preds.append(row)

forecast_df = pd.DataFrame(future_preds)
