In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import numpy as np
import logging
import os
import zipfile
from sklearn.metrics import mean_absolute_error, mean_squared_error
from google.colab import files

# Set cmdstanpy logger to WARNING or ERROR to suppress INFO and DEBUG
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)

# Upload CSV
uploaded = files.upload()

# Load data
df = pd.read_csv("Walmart_Sales.csv")

# Parse dates correctly
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)

# Rename columns to Prophet's expected format
df = df.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'})

# Sort data
df = df.sort_values('ds')

# Add a 'store' column
df['store'] = df['Store'].astype(str)

# Define train/test split dates
start_date = df['ds'].min()
train_end_date = start_date + pd.DateOffset(years=2)

# Prophet US holidays
from prophet.make_holidays import make_holidays_df
holidays = make_holidays_df(year_list=range(start_date.year, df['ds'].max().year + 2), country='US')

# Containers
store_metrics = []
store_forecasts = []

# Create directory for plots
output_dir = "prophet_store_forecasts"
os.makedirs(output_dir, exist_ok=True)

# How many future weeks to forecast
future_periods = 12

# Loop through each store
stores = df['store'].unique()
for store_id in stores:
    print(f"Training model for Store {store_id}")
    store_data = df[df['store'] == store_id].copy()

    train_df = store_data[store_data['ds'] < train_end_date].copy()
    test_df = store_data[store_data['ds'] >= train_end_date].copy()

    # Initialize model
    model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        holidays=holidays
    )

    regressors = ['Holiday_Flag', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
    for reg in regressors:
        model.add_regressor(reg)

    # Fit
    model.fit(train_df[['ds', 'y'] + regressors])

    # Predict on test set
    future_test = test_df[['ds'] + regressors].copy()
    forecast_test = model.predict(future_test)

    # Evaluate
    results = test_df[['ds', 'y']].copy()
    results['yhat'] = forecast_test['yhat'].values

    mae = mean_absolute_error(results['y'], results['yhat'])
    mse = mean_squared_error(results['y'], results['yhat'])
    rmse = np.sqrt(mse)

    def safe_mape(y_true, y_pred):
        mask = y_true != 0
        return (abs((y_true[mask] - y_pred[mask]) / y_true[mask])).mean() * 100
    mape = safe_mape(results['y'], results['yhat'])

    print(f"Store {store_id} - MAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")

    store_metrics.append({
        'store': store_id,
        'mae': mae,
        'rmse': rmse,
        'mape': mape
    })

    # Save backtest forecasts
    store_forecasts.append(results.assign(store=store_id))

    # === Future Prediction ===
    last_date = store_data['ds'].max()
    future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W')

    # Use most recent known values for regressors
    latest_vals = store_data.iloc[-1][regressors]
    future_df = pd.DataFrame({
        'ds': future_dates
    })
    for reg in regressors:
        future_df[reg] = latest_vals[reg]

    future_forecast = model.predict(future_df)

    # === Plotting ===
    plt.figure(figsize=(12, 6))
    # Actual
    plt.plot(store_data['ds'], store_data['y'], label='Actual')
    # Backtest
    plt.plot(results['ds'], results['yhat'], label='Backtest Forecast')
    # Future forecast
    plt.plot(future_forecast['ds'], future_forecast['yhat'], label='Future Forecast', linestyle='dashed')

    plt.title(f'Walmart Store {store_id} Sales Forecast (+{future_periods} Weeks)')
    plt.xlabel('Date')
    plt.ylabel('Weekly Sales')
    plt.legend()

    # Add metrics text box
    metrics_text = (f"MAE: {mae:.2f}\n"
                    f"RMSE: {rmse:.2f}\n"
                    f"MAPE: {mape:.2f}%")
    # Place textbox in upper left corner with some transparency
    plt.gca().text(0.02, 0.95, metrics_text, transform=plt.gca().transAxes,
    fontsize=10, verticalalignment='top', bbox=dict(boxstyle="round,pad=0.5",
    facecolor="white", alpha=0.7))

    plt.tight_layout()
    plt.savefig(f'{output_dir}/walmart_store_{store_id}_forecast.png')
    plt.close()

        # Prepare backtest results with confidence intervals from forecast_test
    backtest_export = test_df[['ds', 'y']].copy()
    backtest_export['yhat'] = forecast_test['yhat'].values
    backtest_export['yhat_lower'] = forecast_test['yhat_lower'].values
    backtest_export['yhat_upper'] = forecast_test['yhat_upper'].values
    backtest_export['store'] = store_id
    backtest_export['type'] = 'backtest'

    # Prepare future forecast export
    future_export = future_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
    future_export['y'] = np.nan  # no actuals for future
    future_export['store'] = store_id
    future_export['type'] = 'future'

    # Combine and save CSV for this store
    export_df = pd.concat([backtest_export, future_export], ignore_index=True)
    export_df.rename(columns={
        'ds': 'date',
        'y': 'actual_revenue',
        'yhat': 'forecast_revenue',
        'yhat_lower': 'lower_ci',
        'yhat_upper': 'upper_ci'
    }, inplace=True)

    csv_filename = f'{output_dir}/walmart_store_{store_id}_forecast.csv'
    export_df.to_csv(csv_filename, index=False)

# Combine forecasts
all_forecasts = pd.concat(store_forecasts)

# Overall metrics
overall_mae = np.mean([m['mae'] for m in store_metrics])
overall_rmse = np.mean([m['rmse'] for m in store_metrics])
overall_mape = np.mean([m['mape'] for m in store_metrics])

print(f"\nOverall Metrics Across Stores:")
print(f"MAE: {overall_mae:.2f}")
print(f"RMSE: {overall_rmse:.2f}")
print(f"MAPE: {overall_mape:.2f}%")

# Zip the folder for download
zip_filename = "prophet_store_forecasts.zip"
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    for root, dirs, files in os.walk(output_dir):
        for file in files:
            zipf.write(os.path.join(root, file), arcname=file)

print(f"Forecast plots saved in '{output_dir}' and zipped as '{zip_filename}'.")

# Add CSV files to the zip as well
with zipfile.ZipFile(zip_filename, 'a') as zipf:
    for root, dirs, files in os.walk(output_dir):
        for file in files:
            if file.endswith('.csv'):
                zipf.write(os.path.join(root, file), arcname=file)

print(f"Forecast CSV files saved alongside plots and zipped as '{zip_filename}'.")