In [6]:
import numpy as np
from src.data.data_loader import BrisT1DDataLoader
from src.tuning.benchmark import impute_missing_values

print("Loading data...")
data_loader = BrisT1DDataLoader()

Loading data...


In [None]:
df = data_loader.train_data

In [17]:
df = df[df["p_num"] == "p01"]  # choose one patient
rows_to_load = 1000
y_cols = ["bg-0:00"]
x_cols = ["cob"]

p_df = impute_missing_values(df, columns=x_cols)
# p_df = impute_missing_values(df, columns=y_cols)
p_df = impute_missing_values(p_df, columns=y_cols)

p_df = p_df.iloc[:rows_to_load]


def load_diabetes():
    p1_df = p_df[p_df["p_num"] == "p01"]
    y_df = p1_df[y_cols]
    x_df = p1_df[x_cols]
    return (y_df, x_df)

In [18]:
from sktime.benchmarking.forecasting import ForecastingBenchmark
from sktime.performance_metrics.forecasting.probabilistic import PinballLoss
from sktime.split import ExpandingSlidingWindowSplitter

benchmark = ForecastingBenchmark(
    backend="loky",  # Use parallel processing
    backend_params={"n_jobs": -1},  # Use all available CPU cores
)

cv_splitter = ExpandingSlidingWindowSplitter(
    initial_window=12 * 24 * 3,
    step_length=12 * 24 * 3,
    fh=np.arange(1, 12 * 6 + 1),
)

scorers = [
    PinballLoss(),  # IMPORTANT: PinballLoss is used for probabilistic forecasting
]

In [19]:
benchmark.add_task(
    dataset_loader=load_diabetes,
    cv_splitter=cv_splitter,
    scorers=scorers,
    # error_score="raise",
)

In [34]:
from src.tuning.benchmark import generate_estimators_from_param_grid
import pandas as pd
import os

## Change your yaml path here
yaml_path = "../../src/tuning/configs/0_arma_05min.yaml"

estimators = generate_estimators_from_param_grid(yaml_path)
print(estimators)
for estimator, estimator_id in estimators:
    if not hasattr(estimator, "_predict_interval"):
        print(
            "Skipping estimator",
            estimator_id,
            "because it doesn't have _predict_interval attribute",
        )
        continue
    benchmark.add_estimator(estimator=estimator, estimator_id=estimator_id)

# Needs to add the datetime to make the file name unique
# so when you run benchmark.run, it doesn't see the file as cached result and not rerun
current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H-%M-%S")
yaml_name = yaml_path.split("/")[-1].replace(".yaml", "")

os.makedirs("./results/param_tests", exist_ok=True)

results_file = f"./{yaml_name}_results.csv"
print("Starting Benchmark Run")
benchmark.run(results_file)

# If there is a file created, then it is probably good enough
if not os.path.exists(results_file):
    raise FileNotFoundError(f"Results file {results_file} was not created")
else:
    print(f"Passed: Results file {results_file} was created")

Training 1 AutoARIMA models with different parameters
[(FallbackForecaster(forecasters=[AutoARIMA(max_p=216, max_q=216, n_jobs=-1,
                                          seasonal=False),
                                NaiveForecaster()],
                   verbose=True), 'AutoARIMA-start_p_2-max_p_216-start_q_2-max_q_216-seasonal_False-n_jobs_-1')]
Starting Benchmark Run


  self.estimators.register(id=estimator_id, entry_point=estimator.clone)


TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'

### Models not working
I am unsure if this is specifically due to Pin ball loss. But if they were previously working, then the only difference in this notebook is that it uses Pin ball loss. So this may be the issue.
1. ARDL (Structural yamls). The error thrown: "TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'"
2. AutoREG (ARMA yamls). The error thrown: "TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'"
3. AutoARIMA (ARMA yamls). The error thrown: "TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'"

## Save Data
Saves this data since it took 30 mins to load!

In [29]:
data_loader.train_data.to_csv("./train_data_cleaned.csv", index=False)