In [3]:
import pandas as pd
from prophet import Prophet
import plotly.graph_objs as go
from plotly.offline import plot
import plotly.tools as tls
import os
import pickle
import numpy as np
from scipy.stats import ttest_ind
import streamlit as st

# Load the data
raw_df = pd.read_csv(r"c:\Users\avrahamma\Documents\School\AI_for_social_good\data\paris_bread_sales.csv")

# Prepare the data for Prophet
df = raw_df[["date", "filled_sold_bread", "day", "temperature_2m_mean", "precipitation_sum_mm"]].copy()  # Use .copy() to avoid SettingWithCopyWarning
df.columns = ["ds", "y", "day", "temperature_2m_mean", "precipitation_sum_mm"]  # Rename the columns to match Prophet's requirements

# Convert the date to a datetime object
df.loc[:, "ds"] = pd.to_datetime(df["ds"], format="%d/%m/%Y")

# Convert the sales to a numeric object
df.loc[:, "y"] = pd.to_numeric(df["y"])

# One-hot encode the "day" column
day_dummies = pd.get_dummies(df["day"], prefix="day")
df = pd.concat([df, day_dummies], axis=1)

In [5]:
# First, identify the last date with actual sales data
last_actual_date = df[df['y'].notna()]['ds'].max()
print(f"Last date with actual sales data: {last_actual_date}")

# Split the data for training - use only data up to the last actual date
train_df = df[df['ds'] <= last_actual_date].copy()

# Initialize and train the model on training data only
model = Prophet()
# Add the regressors
for col in day_dummies.columns:
    model.add_regressor(col)
model.add_regressor('temperature_2m_mean')
model.add_regressor('precipitation_sum_mm')

# Add custom seasonality
model.add_seasonality(name='weekly', period=7, fourier_order=3)
model.add_seasonality(name='yearly', period=365.25, fourier_order=10)

# Train the model
model.fit(df)

# Save the model
with open('prophet_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Create future dataframe that goes exactly one year ahead from the last actual date
future = model.make_future_dataframe(periods=365)
prediction_end_date = last_actual_date + pd.DateOffset(days=365)
future = future[future['ds'] <= prediction_end_date]

# Add the regressors to the future DataFrame
# Add day dummies to the future dataframe
future = pd.concat([future, day_dummies.reindex(future.index, fill_value=0)], axis=1)

# Add temperature data to the future dataframe
if 'temperature_2m_mean' in df.columns:
    future['temperature_2m_mean'] = df['temperature_2m_mean'].reindex(future.index, fill_value=df['temperature_2m_mean'].mean())
else:
    raise ValueError("Temperature data is missing for future predictions. Please provide the temperature data.")

# Add precipitation data to the future dataframe
if 'precipitation_sum_mm' in df.columns:
    future['precipitation_sum_mm'] = df['precipitation_sum_mm'].reindex(future.index, fill_value=df['precipitation_sum_mm'].mean())
else:
    raise ValueError("Precipitation data is missing for future predictions. Please provide the precipitation data.")

# Make predictions
forecast = model.predict(future)

Last date with actual sales data: 2022-09-30 00:00:00


14:34:02 - cmdstanpy - INFO - Chain [1] start processing
14:34:03 - cmdstanpy - INFO - Chain [1] done processing


In [7]:
# Plot the forecast
fig = model.plot(forecast)

# Convert the Matplotlib figure to a Plotly figure
plotly_fig = tls.mpl_to_plotly(fig)

# Display the Plotly figure
plot(plotly_fig)

print(forecast.head())
print(forecast.tail())

# Save the forecast to a CSV file
forecast_path = r"c:\Users\avrahamma\Documents\School\AI_for_social_good\visualizations\prophet_forecast.csv"
forecast.to_csv(forecast_path, index=False)
fig = model.plot(forecast)

# Convert the Matplotlib figure to a Plotly figure
plotly_fig = tls.mpl_to_plotly(fig)

# Display the Plotly figure
plot(plotly_fig)

print(forecast.head())
print(forecast.tail())

# Save the forecast to a CSV file
# forecast_path = r"c:\Users\avrahamma\Documents\School\AI_for_social_good\data\prophet_forecast.csv"
# forecast.to_csv(forecast_path, index=False)


Dang! That path collection is out of this world. I totally don't know what to do with it yet! Plotly can only import path collections linked to 'data' coordinates



          ds       trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
0 2021-01-01  557.058870  235.043697  579.915654   557.058870   557.058870   
1 2021-01-02  557.196089  387.328871  718.930139   557.196089   557.196089   
2 2021-01-03  557.333309  603.050674  947.639979   557.333309   557.333309   
3 2021-01-04  557.470528  230.086191  549.522928   557.470528   557.470528   
4 2021-01-05  557.607747  158.148276  489.779064   557.607747   557.607747   

   additive_terms  additive_terms_lower  additive_terms_upper  day_friday  \
0     -155.313577           -155.313577           -155.313577   20.986016   
1       -3.236207             -3.236207             -3.236207    0.000000   
2      221.299412            221.299412            221.299412    0.000000   
3     -164.640155           -164.640155           -164.640155    0.000000   
4     -237.577698           -237.577698           -237.577698    0.000000   

   ...      weekly  weekly_lower  weekly_upper      yearly  yearly_l


Dang! That path collection is out of this world. I totally don't know what to do with it yet! Plotly can only import path collections linked to 'data' coordinates



          ds       trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
0 2021-01-01  557.058870  235.043697  579.915654   557.058870   557.058870   
1 2021-01-02  557.196089  387.328871  718.930139   557.196089   557.196089   
2 2021-01-03  557.333309  603.050674  947.639979   557.333309   557.333309   
3 2021-01-04  557.470528  230.086191  549.522928   557.470528   557.470528   
4 2021-01-05  557.607747  158.148276  489.779064   557.607747   557.607747   

   additive_terms  additive_terms_lower  additive_terms_upper  day_friday  \
0     -155.313577           -155.313577           -155.313577   20.986016   
1       -3.236207             -3.236207             -3.236207    0.000000   
2      221.299412            221.299412            221.299412    0.000000   
3     -164.640155           -164.640155           -164.640155    0.000000   
4     -237.577698           -237.577698           -237.577698    0.000000   

   ...      weekly  weekly_lower  weekly_upper      yearly  yearly_l