In [29]:
# Install dependencies
!pip install prophet scikit-learn --quiet

import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [30]:
# Load CSV with correct encoding
df = pd.read_csv(
    "mock_kaggle.csv",
    encoding="latin1",
    sep=",",
    quotechar='"',
    on_bad_lines="skip"
)

In [31]:
# Aggregate to daily sales
df["data"] = pd.to_datetime(df["data"])
daily = df.groupby("data", as_index=False)["venda"].sum()

In [32]:
# Prepare for Prophet
prophet_df = daily.rename(columns={"data": "ds", "venda": "y"})

In [33]:
# FINAL MODEL – Train on all data
final_model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
final_model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
final_model.fit(prophet_df)

# Forecast next 90 days
future = final_model.make_future_dataframe(periods=90, freq='D')
forecast = final_model.predict(future)

# Save for Power BI
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].to_csv("prophet_forecast.csv", index=False)
print("✅ Forecast saved as prophet_forecast.csv")


DEBUG:cmdstanpy:input tempfile: /tmp/tmpbsoyfcas/xbk8epw4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbsoyfcas/okcwd4fq.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=17603', 'data', 'file=/tmp/tmpbsoyfcas/xbk8epw4.json', 'init=/tmp/tmpbsoyfcas/okcwd4fq.json', 'output', 'file=/tmp/tmpbsoyfcas/prophet_modeluw746njo/prophet_model-20250812071354.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
07:13:54 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
07:13:54 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


✅ Forecast saved as prophet_forecast.csv


In [35]:
#⃣ EVALUATION – Train/test split
train = prophet_df.iloc[:-90]
test = prophet_df.iloc[-90:]

eval_model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
eval_model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
eval_model.fit(train)

# Predict exactly the test period
future_test = pd.DataFrame({"ds": test["ds"]})
fc_test = eval_model.predict(future_test)

# Merge for metrics
eval_df = pd.merge(test, fc_test[["ds", "yhat"]], on="ds", how="left").dropna()

# Metrics
mae = mean_absolute_error(eval_df['y'], eval_df['yhat'])
rmse = np.sqrt(mean_squared_error(eval_df['y'], eval_df['yhat']))

print(f"📊 MAE: {mae:.2f}")
print(f"📊 RMSE: {rmse:.2f}")

# 📌 Merge actual + forecast for Power BI
# Take historical actuals
actual_df = prophet_df.copy()
actual_df["type"] = "actual"

# Take forecast from final model
forecast_df = forecast[["ds", "yhat"]].copy()
forecast_df.rename(columns={"yhat": "y"}, inplace=True)
forecast_df["type"] = "forecast"

# Merge them
combined_df = pd.concat([actual_df, forecast_df], ignore_index=True)

# Save merged file
combined_df.to_csv("sales_actual_forecast.csv", index=False)
print("✅ Combined actual & forecast saved as sales_actual_forecast.csv")


DEBUG:cmdstanpy:input tempfile: /tmp/tmpbsoyfcas/0cgqzbib.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbsoyfcas/1kxluwnp.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34286', 'data', 'file=/tmp/tmpbsoyfcas/0cgqzbib.json', 'init=/tmp/tmpbsoyfcas/1kxluwnp.json', 'output', 'file=/tmp/tmpbsoyfcas/prophet_modelv01mon0q/prophet_model-20250812075926.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
07:59:26 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
07:59:26 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


📊 MAE: 90.70
📊 RMSE: 113.40
✅ Combined actual & forecast saved as sales_actual_forecast.csv
