In [None]:
pip install prophet



In [None]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-2.20.3-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.20.3 (from mlflow)
  Downloading mlflow_skinny-2.20.3-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.20.3->mlflow)
  Downloading databricks_sdk-0.45.0-py3-none-any.whl.metadata (38 kB)
Collecting Mako (from alembic!=1.10.0,<2->mlflow)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)
Colle

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from prophet import Prophet
import warnings
import pickle
import os
import mlflow
from mlflow.models.signature import infer_signature

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Set up MLflow tracking
mlflow.set_tracking_uri("file:///path/to/your/mlruns")  # Update this path to your mlruns folder
mlflow.set_experiment("Prophet_Demand_Forecasting")  # Set your experiment name

def calculate_metrics(actual, predicted):
    """Calculate error metrics between actual and predicted values."""
    actual_arr = np.array(actual)
    pred_arr = np.array(predicted)

    mae = mean_absolute_error(actual_arr, pred_arr)
    mse = mean_squared_error(actual_arr, pred_arr)
    rmse = np.sqrt(mse)
    r2 = r2_score(actual_arr, pred_arr)
    mape = np.mean(np.abs((actual_arr - pred_arr) / actual_arr)) * 100 if np.all(actual_arr != 0) else np.nan

    return mae, mse, rmse, r2, mape

def save_model_pickle(model, file_path):
    """Save model as pickle file."""
    with open(file_path, 'wb') as f:
        pickle.dump(model, f)
    return file_path

def load_model_pickle(file_path):
    """Load model from pickle file."""
    with open(file_path, 'rb') as f:
        model = pickle.load(f)
    return model

def train_prophet_model(csv_path, target_column="Total Quantity", epochs=5, save_dir="models"):
    """
    Train a Prophet time series model with MLflow tracking.

    Args:
        csv_path: Path to CSV file containing time series data
        target_column: Column name for the target variable
        epochs: Number of days to predict and evaluate
        save_dir: Directory to save model files

    Returns:
        model: Trained Prophet model
        rmse: Root Mean Squared Error of the model
    """
    # Create save directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)

    # Start MLflow run
    with mlflow.start_run(nested=True) as run:
        run_id = run.info.run_id
        print(f"MLflow Run ID: {run_id}")

        # Log parameters
        mlflow.log_param("target_column", target_column)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("csv_file", os.path.basename(csv_path))

        # Load data
        df = pd.read_csv(csv_path)

        # Validate required columns
        if 'Date' not in df.columns or target_column not in df.columns:
            raise ValueError(f"The CSV must contain 'Date' and '{target_column}' columns.")

        # Log data info
        mlflow.log_param("data_rows", len(df))
        mlflow.log_param("date_range", f"{df['Date'].min()} to {df['Date'].max()}")

        # Process dates and handle duplicates
        df['Date'] = pd.to_datetime(df['Date'])
        if df['Date'].duplicated().any():
            print("Warning: Duplicate dates found. Aggregating by mean.")
            mlflow.log_param("duplicate_dates_found", True)
            df = df.groupby('Date', as_index=False).agg({target_column: 'mean'})

        # Sort by date
        df = df.sort_values('Date')

        # Prepare data for Prophet
        prophet_df = df[['Date', target_column]].rename(columns={'Date': 'ds', target_column: 'y'})

        # Add additional regressors (features)
        additional_features = ['is_weekend', 'day_of_month', 'day_of_year', 'month', 'week_of_year', 'lag_1', 'lag_7', 'rolling_mean_7']
        used_features = []
        for feature in additional_features:
            if feature in df.columns:
                prophet_df[feature] = df[feature]
                used_features.append(feature)

        mlflow.log_param("additional_features", used_features)

        # Drop rows with missing values
        original_rows = len(prophet_df)
        prophet_df = prophet_df.dropna()
        dropped_rows = original_rows - len(prophet_df)
        mlflow.log_param("dropped_rows", dropped_rows)

        # Split into train/test sets
        train_size = len(prophet_df) - epochs
        if train_size <= 0:
            mlflow.log_param("training_failed", "insufficient_data")
            return None

        train = prophet_df.iloc[:train_size]
        test = prophet_df.iloc[train_size:]

        mlflow.log_param("train_size", len(train))
        mlflow.log_param("test_size", len(test))

        # Set initial hyperparameters
        prophet_params = {
            'changepoint_prior_scale': 0.05,
            'seasonality_prior_scale': 10,
            'holidays_prior_scale': 10,
            'seasonality_mode': 'multiplicative',
            'interval_width': 0.95,
            'daily_seasonality': False,
            'weekly_seasonality': True,
            'yearly_seasonality': True,
        }

        # Log initial hyperparameters
        for param, value in prophet_params.items():
            mlflow.log_param(f"initial_{param}", value)

        # Train Prophet model
        model = Prophet(**prophet_params)

        # Add additional regressors
        for feature in used_features:
            model.add_regressor(feature)

        # Fit model
        model.fit(train)

        # Create future dataframe for prediction
        future = model.make_future_dataframe(periods=epochs, freq='D')

        # Add additional regressors to the future dataframe
        for feature in used_features:
            future[feature] = np.concatenate([train[feature].values, test[feature].values])

        # Predict
        forecast = model.predict(future)

        # Extract predictions for test period
        prophet_predictions = forecast.iloc[-epochs:]['yhat'].values

        # Calculate metrics
        mae, mse, rmse, r2, mape = calculate_metrics(test['y'].values, prophet_predictions)

        # Log initial metrics
        mlflow.log_metric("initial_mae", mae)
        mlflow.log_metric("initial_mse", mse)
        mlflow.log_metric("initial_rmse", rmse)
        mlflow.log_metric("initial_r2", r2)
        if not np.isnan(mape):
            mlflow.log_metric("initial_mape", mape)

        # Hyperparameter tuning if RMSE is high
        best_params = prophet_params.copy()
        if rmse > 20:
            mlflow.log_param("hyperparameter_tuning", True)
            param_grid = {
                'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
                'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
            }

            mlflow.log_param("tuning_grid", param_grid)

            best_rmse = rmse
            best_model = model

            for cp_scale in param_grid['changepoint_prior_scale']:
                for s_scale in param_grid['seasonality_prior_scale']:
                    try:
                        with mlflow.start_run(nested=True) as child_run:
                            trial_params = {
                                'changepoint_prior_scale': cp_scale,
                                'seasonality_prior_scale': s_scale,
                                'seasonality_mode': 'multiplicative',
                                'interval_width': 0.95
                            }

                            # Log trial parameters
                            for param, value in trial_params.items():
                                mlflow.log_param(param, value)

                            m = Prophet(**trial_params)

                            # Add additional regressors
                            for feature in used_features:
                                m.add_regressor(feature)

                            if len(train) >= 30:
                                m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
                                mlflow.log_param("added_monthly_seasonality", True)

                            m.fit(train)
                            f = m.predict(future)

                            preds = f.iloc[-epochs:]['yhat'].values
                            trial_mae, trial_mse, trial_rmse, trial_r2, trial_mape = calculate_metrics(test['y'].values, preds)

                            # Log trial metrics
                            mlflow.log_metric("mae", trial_mae)
                            mlflow.log_metric("mse", trial_mse)
                            mlflow.log_metric("rmse", trial_rmse)
                            mlflow.log_metric("r2", trial_r2)
                            if not np.isnan(trial_mape):
                                mlflow.log_metric("mape", trial_mape)

                            if trial_rmse < best_rmse:
                                best_rmse = trial_rmse
                                best_model = m
                                prophet_predictions = preds
                                best_params = trial_params.copy()

                                # Save best checkpoint inside loop
                                checkpoint_path = os.path.join(save_dir, f"prophet_checkpoint_{child_run.info.run_id}.pkl")
                                save_model_pickle(m, checkpoint_path)
                                mlflow.log_artifact(checkpoint_path)
                    except Exception as e:
                        print(f"Warning: Trial with cp_scale={cp_scale}, s_scale={s_scale} failed. Error: {e}")
                        mlflow.log_param("failed_trial", f"cp={cp_scale}, s={s_scale}")

            if best_model and best_rmse < rmse:
                model = best_model
                mae, mse, rmse, r2, mape = calculate_metrics(test['y'].values, prophet_predictions)

                # Log best parameters
                for param, value in best_params.items():
                    mlflow.log_param(f"best_{param}", value)

                mlflow.log_metric("best_mae", mae)
                mlflow.log_metric("best_mse", mse)
                mlflow.log_metric("best_rmse", rmse)
                mlflow.log_metric("best_r2", r2)
                if not np.isnan(mape):
                    mlflow.log_metric("best_mape", mape)

        # Post-processing with Gradient Boosting
        final_predictions = prophet_predictions
        used_postprocessing = False

        if rmse > 20:
            try:
                from sklearn.ensemble import GradientBoostingRegressor

                mlflow.log_param("attempted_postprocessing", True)

                required_columns = ['yhat', 'trend', 'yhat_lower', 'yhat_upper']
                if all(col in forecast.columns for col in required_columns):
                    forecast_features = forecast.iloc[-epochs-30:-epochs][required_columns]
                    forecast_features['actual_y'] = prophet_df.iloc[train_size-30:train_size]['y'].values

                    X_train = forecast_features.drop(['actual_y'], axis=1)
                    y_train = forecast_features['actual_y']

                    correction_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)
                    correction_model.fit(X_train, y_train)

                    test_features = forecast.iloc[-epochs:][required_columns]
                    corrected_predictions = correction_model.predict(test_features)

                    corrected_mae, corrected_mse, corrected_rmse, corrected_r2, corrected_mape = calculate_metrics(
                        test['y'].values, corrected_predictions)

                    # Log post-processing metrics
                    mlflow.log_metric("pp_mae", corrected_mae)
                    mlflow.log_metric("pp_mse", corrected_mse)
                    mlflow.log_metric("pp_rmse", corrected_rmse)
                    mlflow.log_metric("pp_r2", corrected_r2)
                    if not np.isnan(corrected_mape):
                        mlflow.log_metric("pp_mape", corrected_mape)

                    if corrected_rmse < rmse:
                        mae, mse, rmse, r2, mape = corrected_mae, corrected_mse, corrected_rmse, corrected_r2, corrected_mape
                        final_predictions = corrected_predictions
                        used_postprocessing = True

                        # Save correction model
                        correction_model_path = os.path.join(save_dir, "correction_model.pkl")
                        save_model_pickle(correction_model, correction_model_path)
                        mlflow.log_artifact(correction_model_path)
                        mlflow.log_param("used_postprocessing", True)
            except Exception as e:
                print(f"Warning: Post-processing failed. Error: {e}")
                mlflow.log_param("postprocessing_error", str(e))

        # Create performance metrics table
        metrics = {
            'Metric': ['MAE', 'MSE', 'RMSE', 'R²', 'MAPE (%)'],
            'Value': [mae, mse, rmse, r2, mape]
        }
        metrics_df = pd.DataFrame(metrics)

        # Create comparison table
        comparison = pd.DataFrame({
            'Date': test['ds'].values,
            'Actual': test['y'].values,
            'Predicted': final_predictions,
            'Error': np.abs(test['y'].values - final_predictions),
            'Percentage Error': np.abs((test['y'].values - final_predictions) / test['y'].values) * 100
                                if np.all(test['y'].values != 0) else np.nan
        })

        # Save metrics and comparison tables as CSV
        metrics_path = os.path.join(save_dir, "metrics.csv")
        comparison_path = os.path.join(save_dir, "predictions.csv")
        metrics_df.to_csv(metrics_path, index=False)
        comparison.to_csv(comparison_path, index=False)

        # Log as MLflow artifacts
        mlflow.log_artifact(metrics_path)
        mlflow.log_artifact(comparison_path)

        # Print results
        print("\n===== PERFORMANCE METRICS =====")
        print(metrics_df.to_string(index=False))

        print("\n===== PREDICTION RESULTS =====")
        print(comparison.to_string(index=False))

        # Save final model
        model_path = os.path.join(save_dir, f"prophet_model_{run_id}.pkl")
        save_model_pickle(model, model_path)

        # Log final metrics
        mlflow.log_metric("final_mae", mae)
        mlflow.log_metric("final_mse", mse)
        mlflow.log_metric("final_rmse", rmse)
        mlflow.log_metric("final_r2", r2)
        if not np.isnan(mape):
            mlflow.log_metric("final_mape", mape)

        # Create a sample input for model signature
        sample_input = pd.DataFrame({'ds': [pd.Timestamp.now()]})
        for feature in used_features:
            sample_input[feature] = [0.0]  # Sample value

        # Create a sample output for model signature
        sample_output = pd.DataFrame({'yhat': [0.0], 'trend': [0.0], 'yhat_lower': [0.0], 'yhat_upper': [0.0]})

        # Infer model signature
        signature = infer_signature(sample_input, sample_output)

        # Log the model as an MLflow artifact
        mlflow.pyfunc.log_model(
            artifact_path="prophet_model",
            python_model=ProphetWrapper(model, used_features),
            artifacts={"prophet_model": model_path},
            signature=signature
        )

        # Log the pickle file separately
        mlflow.log_artifact(model_path)

        print(f"Model saved to {model_path}")
        print(f"MLflow run ID: {run_id}")

        return model, rmse

class ProphetWrapper(mlflow.pyfunc.PythonModel):
    """Wrapper class for Prophet model to use with MLflow."""

    def __init__(self, model, features):
        self.model = model
        self.features = features

    def load_context(self, context):
        """Load model from the artifacts."""
        import pickle

        with open(context.artifacts["prophet_model"], "rb") as f:
            self.model = pickle.load(f)

    def predict(self, context, model_input):
        """Make predictions with the model."""
        future = self.model.make_future_dataframe(periods=len(model_input), freq='D')

        # Add regressor values if available
        for feature in self.features:
            if feature in model_input.columns:
                future[feature] = model_input[feature].values

        # Make predictions
        forecast = self.model.predict(future)
        return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

def load_and_predict(run_id, prediction_dates, csv_path=None, features_data=None):
    """
    Load a saved Prophet model from MLflow and make predictions.

    Args:
        run_id: MLflow run ID
        prediction_dates: List or array of dates to predict for
        csv_path: Optional path to CSV file with feature data
        features_data: Optional DataFrame with feature data

    Returns:
        DataFrame with predictions
    """
    # Load the model from MLflow
    model_uri = f"runs:/{run_id}/prophet_model"
    loaded_model = mlflow.pyfunc.load_model(model_uri)

    # Prepare input dataframe
    input_df = pd.DataFrame({'ds': pd.to_datetime(prediction_dates)})

    # Add features if provided
    if features_data is not None:
        for col in features_data.columns:
            if col != 'ds' and col != 'Date':
                input_df[col] = features_data[col].values
    elif csv_path is not None:
        df = pd.read_csv(csv_path)
        df['Date'] = pd.to_datetime(df['Date'])
        df = df[df['Date'].isin(prediction_dates)]

        for col in df.columns:
            if col != 'Date' and col != 'ds':
                input_df[col] = df[col].values

    # Make predictions
    predictions = loaded_model.predict(input_df)

    return predictions

if __name__ == "__main__":
    # Example usage
    csv_path = "ML.csv"  # Replace with your CSV file path
    save_dir = "prophet_models"

    model, rmse = train_prophet_model(csv_path, target_column="Total Quantity", epochs=5, save_dir=save_dir)




MLflow Run ID: 828f10ca4f8f4e00bbf5dccc433ef4e5


DEBUG:cmdstanpy:input tempfile: /tmp/tmpk3w0xnsv/jh32_27d.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpk3w0xnsv/3jedyr2u.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=87762', 'data', 'file=/tmp/tmpk3w0xnsv/jh32_27d.json', 'init=/tmp/tmpk3w0xnsv/3jedyr2u.json', 'output', 'file=/tmp/tmpk3w0xnsv/prophet_modelq_imqrl0/prophet_model-20250307190543.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
19:05:43 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
19:05:43 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



===== PERFORMANCE METRICS =====
  Metric    Value
     MAE 1.610065
     MSE 2.985238
    RMSE 1.727784
      R² 0.838832
MAPE (%) 1.120920

===== PREDICTION RESULTS =====
      Date  Actual  Predicted    Error  Percentage Error
2024-12-27 148.500 150.364067 1.864067          1.255264
2024-12-28 138.250 139.566849 1.316849          0.952513
2024-12-29 142.750 143.424374 0.674374          0.472416
2024-12-30 138.125 139.741157 1.616157          1.170068
2024-12-31 147.000 144.421123 2.578877          1.754338


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Model saved to prophet_models/prophet_model_828f10ca4f8f4e00bbf5dccc433ef4e5.pkl
MLflow run ID: 828f10ca4f8f4e00bbf5dccc433ef4e5


In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

# Load the dataset (replace with your actual data loading)
df = pd.read_csv('ML.csv')

# For demonstration, create sample data similar to your description
date_range = pd.date_range(start='2023-01-01', end='2024-02-29', freq='D')
np.random.seed(42)

# Use the specific product names you provided
products = ['beef', 'corn', 'milk', 'sugar', 'wheat', 'chocolate', 'coffee', 'soybeans']
data = []

for product in products:
    for date in date_range:
        # Base quantity with some seasonality
        base_qty = 100 + 20 * np.sin(date.dayofyear * 2 * np.pi / 365)

        # Product-specific patterns (simplified)
        if product == 'milk':
            day_effect = 15 if date.dayofweek < 5 else -10
        elif product == 'chocolate':
            month_effect = 30 if date.month in [1, 2, 12] else 0
            day_effect = 50 if date.month == 2 and date.day == 14 else 0  # Valentine's Day
        elif product == 'coffee':
            day_effect = 5 if date.month in [11, 12, 1, 2] else 0
        elif product in ['corn', 'wheat', 'soybeans']:
            day_effect = -20 if date.month in [9, 10] else 10
        elif product == 'sugar':
            day_effect = 25 if date.month in [11, 12] else 0
        else:  # beef
            day_effect = 20 if date.dayofweek >= 5 else 0

        # Monthly pattern (simplified)
        month_effect = date.month * 3

        # Random noise
        noise = np.random.normal(0, 10)

        quantity = max(1, int(base_qty + day_effect + month_effect + noise))

        # Calculate derived features (simplified)
        data.append({
            'Date': date,
            'Product_Name': product,
            'Total_Quantity': quantity,
            'day_of_week': date.dayofweek,
            'month': date.month,
        })

df = pd.DataFrame(data)

# Calculate lag features more efficiently using pandas
def add_lag_features(df, product_name, lag_days=[1, 7]):
    product_df = df[df['Product_Name'] == product_name].copy()
    product_df = product_df.sort_values('Date')

    # Add lag features
    for lag in lag_days:
        product_df[f'lag_{lag}'] = product_df['Total_Quantity'].shift(lag)

    # Add rolling mean (more efficient)
    product_df['rolling_mean_7'] = product_df['Total_Quantity'].rolling(window=7).mean()

    return product_df.dropna()

# Function to train Prophet model (simplified)
def train_prophet_model(product_data, product_name, use_hyperparams=False):
    # Prepare data for Prophet
    prophet_data = product_data[['Date', 'Total_Quantity']].rename(
        columns={'Date': 'ds', 'Total_Quantity': 'y'}
    )

    # Add regressors if they exist
    regressors = []
    for col in ['lag_1', 'lag_7', 'rolling_mean_7']:
        if col in product_data.columns:
            prophet_data[col] = product_data[col]
            regressors.append(col)

    # Split data into train and test (last 30 days)
    train_data = prophet_data.iloc[:-30]
    test_data = prophet_data.iloc[-30:]

    # Best parameters (preset to avoid tuning)
    best_params = {
        'yearly_seasonality': True,
        'weekly_seasonality': True,
        'daily_seasonality': False,
        'seasonality_mode': 'multiplicative',
        'changepoint_prior_scale': 0.1,
        'seasonality_prior_scale': 1.0
    }

    # Train with best parameters
    model = Prophet(**best_params)

    # Add regressors
    for regressor in regressors:
        model.add_regressor(regressor)

    # Fit model
    model.fit(train_data)

    # Make predictions on test data
    forecast = model.predict(test_data)

    # Calculate metrics
    mae = mean_absolute_error(test_data['y'], forecast['yhat'])
    rmse = np.sqrt(mean_squared_error(test_data['y'], forecast['yhat']))
    r2 = r2_score(test_data['y'], forecast['yhat'])

    # Generate future forecast (next 30 days)
    last_date = prophet_data['ds'].max()
    future_dates = pd.date_range(
        start=last_date + pd.Timedelta(days=1),
        periods=30
    )
    future = pd.DataFrame({'ds': future_dates})

    # Simplified forecast - Handle regressor values more efficiently
    if regressors:
        # Use simpler approximation for regressors
        for regressor in regressors:
            if regressor == 'lag_1':
                # Set lag_1 to the average of last 7 days for simplicity
                future[regressor] = prophet_data['y'].tail(7).mean()
            elif regressor == 'lag_7':
                # Set lag_7 to the average of last 14 days for simplicity
                future[regressor] = prophet_data['y'].tail(14).mean()
            elif regressor == 'rolling_mean_7':
                # Set rolling_mean to the average of last 14 days for simplicity
                future[regressor] = prophet_data['y'].tail(14).mean()

    # Make prediction for the future
    future_forecast = model.predict(future)

    return model, future_forecast, mae, rmse, r2

# Container for results
models = {}
forecasts = {}
metrics = {}

# Process products using more efficient approach
for product in tqdm(products, desc="Processing products"):
    # Add lag features for this product only
    product_data = add_lag_features(df, product)

    if len(product_data) < 30:
        continue

    # Train model without expensive hyperparameter tuning
    model, forecast, mae, rmse, r2 = train_prophet_model(
        product_data,
        product,
        use_hyperparams=False  # Skip hyperparameter tuning
    )

    if model is not None:
        models[product] = model
        forecasts[product] = forecast
        metrics[product] = {'MAE': mae, 'RMSE': rmse, 'R2': r2}

if metrics:
    metrics_df = pd.DataFrame.from_dict(metrics, orient='index')
    print("\nModel Performance Metrics:")
    print(metrics_df)
    print(f"Average MAE: {metrics_df['MAE'].mean():.2f}")
    print(f"Average RMSE: {metrics_df['RMSE'].mean():.2f}")
    print(f"Average R²: {metrics_df['R2'].mean():.4f}")

# Create a consolidated forecast dataframe
consolidated_forecast = pd.DataFrame()

for product in products:
    if product in forecasts:
        product_forecast = forecasts[product]

        # Create a dataframe with forecast info
        temp_df = pd.DataFrame({
            'Date': product_forecast['ds'],
            'Product_Name': product,
            'Forecasted_Quantity': product_forecast['yhat'],
            'Lower_Bound': product_forecast['yhat_lower'],
            'Upper_Bound': product_forecast['yhat_upper']
        })

        consolidated_forecast = pd.concat([consolidated_forecast, temp_df])

if not consolidated_forecast.empty:
    # Sort by date and product
    consolidated_forecast = consolidated_forecast.sort_values(['Date', 'Product_Name'])

    # Save consolidated forecast
    consolidated_forecast.to_csv('all_products_forecast.csv', index=False)


Processing products:   0%|          | 0/8 [00:00<?, ?it/s]

DEBUG:cmdstanpy:input tempfile: /tmp/tmpv63jhtme/1qlccqbx.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpv63jhtme/wkr70jyd.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=85664', 'data', 'file=/tmp/tmpv63jhtme/1qlccqbx.json', 'init=/tmp/tmpv63jhtme/wkr70jyd.json', 'output', 'file=/tmp/tmpv63jhtme/prophet_model2gpy5cfm/prophet_model-20250308224100.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
22:41:00 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
22:41:00 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpv63jhtme/jehjfkf4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpv63jhtme/j73a7e1q.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/


Model Performance Metrics:
                MAE       RMSE        R2
beef       6.662814   7.877375  0.715608
corn       8.076735   9.560454  0.119259
milk       8.964739  11.150455  0.368100
sugar      6.995440   8.716770  0.023492
wheat      7.112712   9.024526  0.229449
chocolate  9.639820  13.258660  0.019285
coffee     9.856897  12.169043  0.025923
soybeans   8.951347  11.174621  0.110062
Average MAE: 8.28
Average RMSE: 10.37
Average R²: 0.2014
