In [23]:
# Section 1: Data Loading and Preprocessing

import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import plotly.graph_objs as go
from plotly.offline import plot
import plotly.tools as tls
import os
import pickle
import numpy as np

def load_and_preprocess_data(file_path):
    """Load and preprocess the bread sales data."""
    raw_df = pd.read_csv(file_path)
    df = raw_df[["date", "filled_sold_bread", "day", 
                 "temperature_2m_mean", "precipitation_sum_mm"]].copy()
    
    # Rename columns to match Prophet's requirements
    df.columns = ["ds", "y", "day", "temperature_2m_mean", "precipitation_sum_mm"]
    
    # Convert date and sales
    df.loc[:, "ds"] = pd.to_datetime(df["ds"], format="%d/%m/%Y")
    df.loc[:, "y"] = pd.to_numeric(df["y"])
    
    # Create day dummies
    day_dummies = pd.get_dummies(df["day"], prefix="day")
    df = pd.concat([df, day_dummies], axis=1)
    
    return df, day_dummies

In [24]:
# Section 2: Model Configuration and Training

def configure_prophet_model():
    """Configure Prophet model with custom parameters and seasonality."""
    model = Prophet(
        changepoint_prior_scale=0.05,  # Controls flexibility of trend changes
        changepoint_range=0.9,         # Consider trend changes up to 90% of timeline
        seasonality_prior_scale=10.0   # Controls flexibility of seasonality
    )
    
    # Add custom seasonality patterns
    model.add_seasonality(name='weekly', period=7, fourier_order=3)
    model.add_seasonality(name='yearly', period=365.25, fourier_order=10)
    
    return model

def add_regressors(model, day_dummies):
    """Add additional features (regressors) to the model."""
    # Add day-of-week indicators
    for col in day_dummies.columns:
        model.add_regressor(col)
    
    # Add weather-related features
    model.add_regressor('temperature_2m_mean')
    model.add_regressor('precipitation_sum_mm')

def train_model(df, day_dummies):
    """Configure and train the Prophet model."""
    # Configure base model
    model = configure_prophet_model()
    
    # Add additional features
    add_regressors(model, day_dummies)
    
    # Fit the model
    print("Training model...")
    model.fit(df)
    print("Model training completed")
    
    # Save the trained model
    with open('prophet_model.pkl', 'wb') as f:
        pickle.dump(model, f)
        print("Model saved to prophet_model.pkl")
    
    return model

In [25]:
# Section 3: Model Evaluation

def evaluate_model(model):
    """Perform cross-validation and display performance metrics."""
    # Perform cross-validation
    df_cv = cross_validation(
        model, 
        initial='365 days',
        period='30 days',
        horizon='30 days'
    )
    
    # Calculate performance metrics
    df_p = performance_metrics(df_cv)
    
    # Display metrics
    print("Model Performance Metrics:")
    print(df_p)
    
    # Plot cross-validation results
    from prophet.plot import plot_cross_validation_metric
    fig = plot_cross_validation_metric(df_cv, metric='rmse')
    
    return df_cv, df_p, fig

In [26]:
# Section 4: Future Predictions

def prepare_future_dataframe(model, df, day_dummies, prediction_days=365):
    """Prepare future dataframe for predictions."""
    # Get last date with actual data
    last_actual_date = df[df['y'].notna()]['ds'].max()
    print(f"Last date with actual sales data: {last_actual_date}")
    
    # Create future dataframe
    future = model.make_future_dataframe(periods=prediction_days)
    prediction_end_date = last_actual_date + pd.DateOffset(days=prediction_days)
    future = future[future['ds'] <= prediction_end_date]
    
    # Add features
    # Day dummies
    future = pd.concat([future, day_dummies.reindex(future.index, fill_value=0)], axis=1)
    
    # Weather features (using historical means)
    for feature in ['temperature_2m_mean', 'precipitation_sum_mm']:
        if feature in df.columns:
            future[feature] = df[feature].reindex(
                future.index, 
                fill_value=df[feature].mean()
            )
        else:
            raise ValueError(f"{feature} data missing for predictions")
    
    return future, last_actual_date

In [27]:
# Section 5: Visualization and Results Export
def create_visualization(model, forecast):
    """Create basic forecast visualization"""
    # Create the matplotlib figure
    fig = model.plot(forecast)
    
    # Convert to plotly figure
    plotly_fig = tls.mpl_to_plotly(fig)
    
    # Display the plotly figure
    plot(plotly_fig)
    
def export_results(forecast, output_path):
    """Save forecast results to CSV"""
    forecast.to_csv(output_path, index=False)
    print(f"Forecast saved to: {output_path}")

In [28]:
# Main execution
if __name__ == "__main__":
    # 1. Load and preprocess data
    file_path = r"c:\Users\avrahamma\Documents\School\AI_for_social_good\data\paris_bread_sales.csv"
    df, day_dummies = load_and_preprocess_data(file_path)
    
    # 2. Configure and train model
    model = configure_prophet_model()
    add_regressors(model, day_dummies)
    model.fit(df)
    
    # 3. Save trained model
    with open('prophet_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    
    # 4. Evaluate model
    df_cv = cross_validation(model, initial='365 days', period='30 days', horizon='30 days')
    df_p = performance_metrics(df_cv)
    print("Model Performance Metrics:")
    print(df_p)
    
    # 5. Generate predictions
    future, last_actual_date = prepare_future_dataframe(model, df, day_dummies)
    forecast = model.predict(future)
    
    # 6. Visualize and export results
    create_visualization(model, forecast)
    
    output_path = r"c:\Users\avrahamma\Documents\School\AI_for_social_good\visualizations\prophet_forecast.csv"
    export_results(forecast, output_path)

21:40:20 - cmdstanpy - INFO - Chain [1] start processing
21:40:20 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.
  0%|          | 0/9 [00:00<?, ?it/s]21:40:20 - cmdstanpy - INFO - Chain [1] start processing
21:40:20 - cmdstanpy - INFO - Chain [1] done processing
 11%|█         | 1/9 [00:00<00:02,  4.00it/s]21:40:21 - cmdstanpy - INFO - Chain [1] start processing
21:40:21 - cmdstanpy - INFO - Chain [1] done processing
 22%|██▏       | 2/9 [00:00<00:01,  3.90it/s]21:40:21 - cmdstanpy - INFO - Chain [1] start processing
21:40:21 - cmdstanpy - INFO - Chain [1] done processing
 33%|███▎      | 3/9 [00:00<00:01,  3.88it/s]21:40:21 - cmdstanpy - INFO - Chain [1] start processing
21:40:21 - cmdstanpy - INFO - Chain [1] done processing
 44%|████▍     | 4/9 [00:01<00:01,  3.87it/s]21:40:21 - cmdstanpy - INFO - Chain [1] start processing
21:40:22 - cmdstanpy - INFO - Chain [1] done processing
 

Model Performance Metrics:
   horizon           mse        rmse         mae      mape     mdape  \
0   3 days  22341.265092  149.469947  103.617550  0.465394  0.140110   
1   4 days  32203.524779  179.453406  124.939527  0.260767  0.182608   
2   5 days  40503.114272  201.253855  140.722707  0.325267  0.201003   
3   6 days  47366.582324  217.638651  152.156138  0.361969  0.194274   
4   7 days  41431.201668  203.546559  146.304242  0.381649  0.187233   
5   8 days  39156.459039  197.879911  148.271469  0.375219  0.187233   
6   9 days  33054.257861  181.808300  139.863373  0.356618  0.144086   
7  10 days  27215.445014  164.971043  125.864195  0.337248  0.134562   
8  11 days  27499.384817  165.829385  130.866945  0.319484  0.144086   
9  12 days  42548.876913  206.273791  151.604061  0.349118  0.148700   
10 13 days  71140.063508  266.720947  205.168777  0.373942  0.234680   
11 14 days  87800.939072  296.312232  230.234386  0.406754  0.275441   
12 15 days  95206.057087  308.554788 


Dang! That path collection is out of this world. I totally don't know what to do with it yet! Plotly can only import path collections linked to 'data' coordinates



Forecast saved to: c:\Users\avrahamma\Documents\School\AI_for_social_good\visualizations\prophet_forecast.csv
