In [None]:
import numpy as np
from sktime.performance_metrics.forecasting.probabilistic import PinballLoss
from sktime.forecasting.statsforecast import StatsForecastAutoCES
from sktime.benchmarking.forecasting import ForecastingBenchmark
from sktime.split import ExpandingSlidingWindowSplitter

from src.data.data_loader import BrisT1DDataLoader
from src.tuning.benchmark import impute_missing_values

data_loader = BrisT1DDataLoader()
df = data_loader.train_data

y_cols = ["bg-0:00"]
x_cols = ["cob"]

p_df = impute_missing_values(df, columns=x_cols)
p_df = impute_missing_values(df, columns=y_cols)


def load_diabetes():
    p1_df = p_df[p_df["p_num"] == "p01"]
    y_df = p1_df[y_cols]
    x_df = p1_df[x_cols]
    return (y_df, x_df)


benchmark = ForecastingBenchmark(
    backend="loky",  # Use parallel processing
    backend_params={"n_jobs": -1},  # Use all available CPU cores
)

cv_splitter = ExpandingSlidingWindowSplitter(
    initial_window=12 * 24 * 3,
    step_length=12 * 24 * 3,
    fh=np.arange(1, 12 * 6 + 1),
)

scorers = [
    PinballLoss(),  # IMPORTANT: PinballLoss is used for probabilistic forecasting
]

benchmark.add_task(
    dataset_loader=load_diabetes,
    cv_splitter=cv_splitter,
    scorers=scorers,
    # error_score="raise",
)

In [None]:
from src.tuning.benchmark import generate_estimators_from_param_grid
import datetime
import pandas as pd
import os

## Change your yaml path here
yaml_path = "../../src/tuning/configs/1_exponential_smooth_15min.yaml"

estimators = generate_estimators_from_param_grid(yaml_path)
for estimator, estimator_id in estimators:
    if not hasattr(estimator, "_predict_interval"):
        print(
            "Skipping estimator",
            estimator_id,
            "because it doesn't have _predict_interval attribute",
        )
        continue
    benchmark.add_estimator(estimator=estimator, estimator_id=estimator_id)

# Needs to add the datetime to make the file name unique
# so when you run benchmark.run, it doesn't see the file as cached result and not rerun
current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H-%M-%S")
yaml_name = yaml_path.split("/")[-1].replace(".yaml", "")

os.makedirs("./results/param_tests", exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_file = f"./{estimator_id}_results.csv"
benchmark.run(results_file)

# If there is a file created, then it is probably good enough
if not os.path.exists(results_file):
    raise FileNotFoundError(f"Results file {results_file} was not created")
else:
    print(f"Passed: Results file {results_file} was created")

In [None]:
# Example forecaster:  Modify your estimator here
estimator = StatsForecastAutoCES()
estimator_id = estimator.__class__.__name__


benchmark.add_estimator(
    estimator=estimator,
    estimator_id=estimator_id,
)

benchmark.run(f"./{estimator_id}_results.csv")