In [1]:
# === IMPORTS ===
import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import matplotlib.pyplot as plt

# === 1. Load and Aggregate Data ===
def load_and_resample(filepath):
    df = pd.read_csv(filepath)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.drop(columns=['_id'], inplace=True, errors='ignore')
    df = df.set_index('Timestamp').resample('D').sum()
    df = df.loc["2022-01-01":"2025-06-30"]
    df.reset_index(inplace=True)
    df.rename(columns={"Timestamp": "ds", "Redemption Count": "y"}, inplace=True)
    return df

# === 2. Add Regressors (optional but powerful in Prophet) ===
def add_regressors(df):
    df['dow'] = df['ds'].dt.dayofweek
    df['is_weekend'] = df['dow'].isin([5, 6]).astype(int)
    df['is_summer'] = df['ds'].dt.month.isin([6, 7, 8]).astype(int)
    df['is_summer_weekend'] = df['is_summer'] * df['is_weekend']
    return df

# === 3. Define Holidays ===
def make_holidays():
    toronto_holidays = pd.DataFrame({
        'holiday': 'canada_holiday',
        'ds': pd.to_datetime([
            '2022-07-01', '2023-07-01', '2024-07-01', '2025-07-01',  # Canada Day
            '2022-12-25', '2023-12-25', '2024-12-25', '2025-12-25',  # Christmas
        ]),
        'lower_window': 0,
        'upper_window': 1,
    })
    return toronto_holidays

# === 4. Fit Prophet Model ===
def fit_prophet_model(df, holidays):
    m = Prophet(holidays=holidays, yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
    
    # Add custom regressors
    m.add_regressor('is_weekend')
    m.add_regressor('is_summer')
    m.add_regressor('is_summer_weekend')

    m.fit(df)
    return m

# === 5. Forecast ===
def make_forecast(model, df, periods=30):
    future = model.make_future_dataframe(periods=periods)
    
    # Reconstruct regressors in future
    future['dow'] = future['ds'].dt.dayofweek
    future['is_weekend'] = future['dow'].isin([5, 6]).astype(int)
    future['is_summer'] = future['ds'].dt.month.isin([6, 7, 8]).astype(int)
    future['is_summer_weekend'] = future['is_summer'] * future['is_weekend']
    
    forecast = model.predict(future)
    return forecast

# === 6. Plot and Evaluate ===
def plot_forecast(model, forecast, df):
    fig1 = plot_plotly(model, forecast)
    fig1.show()
    fig2 = plot_components_plotly(model, forecast)
    fig2.show()

    forecast_df = forecast[['ds', 'yhat']].set_index('ds')
    actual_df = df.set_index('ds')[['y']]
    joined = actual_df.join(forecast_df, how='inner')
    # mae = mean_absolute_error(joined['y'], joined['yhat'])
    # rmse = root_mean_squared_error(joined['y'], joined['yhat'], squared=False)
    
    train_cutoff = df['ds'].max() - pd.Timedelta(days=30)
    test_df = joined[joined.index > train_cutoff]

    mae = mean_absolute_error(test_df['y'], test_df['yhat'])
    rmse = root_mean_squared_error(test_df['y'], test_df['yhat'])
    print(f"Evaluation MAE: {mae:.2f}, RMSE: {rmse:.2f}")

# === 7. Run Pipeline ===
def run_prophet_pipeline(filepath):
    df = load_and_resample(filepath)
    df = add_regressors(df)
    holidays = make_holidays()
    model = fit_prophet_model(df, holidays)
    forecast = make_forecast(model, df, periods=30)
    plot_forecast(model, forecast, df)

# Example:
# run_prophet_pipeline("your_data.csv")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
filepath_data =  "../data/Toronto Island Ferry Ticket Counts.csv"
run_prophet_pipeline(filepath_data)

16:16:46 - cmdstanpy - INFO - Chain [1] start processing
16:16:46 - cmdstanpy - INFO - Chain [1] done processing


Evaluation MAE: 2016.89, RMSE: 2452.98
