In [1]:
import mlflow
import mlflow.prophet
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import add_changepoints_to_plot
import optuna
from optuna.visualization import plot_intermediate_values

from mlflow_prophet_lab.config import AppConfig, load_config
from mlflow_prophet_lab.data_loader import load_data, RateRecord
from mlflow_prophet_lab.holidays import get_holidays
from mlflow_prophet_lab.weekends_remover import remove_weekends
from mlflow_prophet_lab.cross_validation import run_cross_validation

from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

In [2]:
config: AppConfig = load_config()
mlflow.set_tracking_uri(config.db_uri)

loading config...
config loaded


In [3]:
records: list[RateRecord] = load_data(config)

In [4]:
df = pd.DataFrame([{"ds": r.date, "y": r.rate} for r in records])

In [5]:
def objective(trial, df):
    """Optuna objective for Prophet hyperparameter tuning."""

    with mlflow.start_run(nested=True):        
        params = {
            "changepoint_prior_scale": trial.suggest_float(
                "changepoint_prior_scale", 0.001, 0.5
            ),
            "changepoint_range": trial.suggest_float(
                "changepoint_range", 0.8, 0.95
            ),
            "seasonality_prior_scale": trial.suggest_float(
                "seasonality_prior_scale", 0.01, 10
            ),
            "holidays_prior_scale": trial.suggest_float(
                "holidays_prior_scale", 0.01, 10
            ),
            "yearly_seasonality": trial.suggest_float(
                "yearly_seasonality", 10, 20
            ),
            "seasonality_mode": trial.suggest_categorical(
                "seasonality_mode", ["additive", "multiplicative"]
            ),
            "holidays": get_holidays(list(range(df['ds'].min().year, df['ds'].max().year + 1)))
        }

        model = Prophet(**params)
        model.fit(df)

        cv_results = run_cross_validation(model)
        metrics = performance_metrics(cv_results)
        mape = metrics["mape"].mean()

        mlflow.log_params(params)
        mlflow.log_metric("mape", mape)

        return mape

In [6]:
with mlflow.start_run(run_name="Prophet HPO"):
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, df), n_trials=50)

    plot_intermediate_values(study)

    mlflow.log_params({f"best_{k}": v for k, v in study.best_params.items()})
    mlflow.log_metric("best_mape", study.best_value)

2025/12/30 17:51:31 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/30 17:51:31 INFO mlflow.store.db.utils: Updating database tables
2025/12/30 17:51:31 INFO alembic.runtime.migration: Context impl PostgresqlImpl.
2025/12/30 17:51:31 INFO alembic.runtime.migration: Will assume transactional DDL.
2025/12/30 17:51:31 INFO alembic.runtime.migration: Context impl PostgresqlImpl.
2025/12/30 17:51:31 INFO alembic.runtime.migration: Will assume transactional DDL.
[I 2025-12-30 17:51:31,919] A new study created in memory with name: no-name-76aef52f-4c32-4290-9000-2a147cdf0050
17:51:32 - cmdstanpy - INFO - Chain [1] start processing
17:51:35 - cmdstanpy - INFO - Chain [1] done processing
17:51:36 - cmdstanpy - INFO - Chain [1] start processing
17:51:36 - cmdstanpy - INFO - Chain [1] start processing
17:51:36 - cmdstanpy - INFO - Chain [1] start processing
17:51:36 - cmdstanpy - INFO - Chain [1] start processing
17:51:36 - cmdstanpy - INFO - Chain [1] start process