In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
from src.data.data_loader import load_data
from src.data.data_cleaner import clean_data
from src.data.data_transforms import create_time_diff_cols

df = clean_data(load_data())
df = create_time_diff_cols(df)

In [3]:
# We'll just tune the hyperparams on the first 100 timepoints for patient 1 since we're just testing
y = df.iloc[:100][
    ["bg-0:00", "insulin-0:00"]
]  # Only want to test the endogenous data right now
y

Unnamed: 0,bg-0:00,insulin-0:00
0,15.1,0.0417
1,14.4,0.0417
2,13.9,0.0417
3,13.8,0.0417
4,13.4,0.0417
...,...,...
95,9.9,0.0417
96,9.7,0.0417
97,9.3,0.0417
98,8.4,0.0417


In [4]:
from sktime.transformations.series.impute import Imputer

# Impute missing values
transformer = Imputer(method="nearest")
transformer.fit(y)
y = transformer.transform(y)

y

Unnamed: 0,bg-0:00,insulin-0:00
0,15.1,0.0417
1,14.4,0.0417
2,13.9,0.0417
3,13.8,0.0417
4,13.4,0.0417
...,...,...
95,9.9,0.0417
96,9.7,0.0417
97,9.3,0.0417
98,8.4,0.0417


In [5]:
# Should return none
y[y.isna()].sum()

bg-0:00         0.0
insulin-0:00    0.0
dtype: float64

In [6]:
# Load all sktime forecasters to be accessible by class name
from sktime.registry import all_estimators

# Get all forecasting models
forecasters = {name: est for name, est in all_estimators(estimator_types="forecaster")}


def load_model(model_name):
    """Loads the sktime model class given the model name"""
    if model_name not in forecasters:
        raise ValueError(f"Model {model_name} not found in sktime")

    ForecasterClass = forecasters[model_name]  # Get the class
    return ForecasterClass()  # Instantiate the model

In [7]:
from sktime.forecasting.model_selection import ForecastingGridSearchCV
from sktime.split import ExpandingSlidingWindowSplitter
from sktime.performance_metrics.forecasting import MeanSquaredError

fh = [1, 2, 3, 4, 5, 6]
cv = ExpandingSlidingWindowSplitter(
    fh=fh, initial_window=12, step_length=12, max_expanding_window_length=24 * 12
)

In [8]:
from src.utils.config_loader import load_yaml_config

# Change below to the path of your config file
config = load_yaml_config("../../src/tuning/configs/modset1.yaml")

# Specify the model names you want to test params for as labelled in the config
# The names should match their sktime class names
models = ["ARIMA"]

In [9]:
from datetime import datetime
import pandas as pd

from src.tuning.param_grid import generate_param_grid

# Keep track of all models tested and log any errors
tested_models = []

# Loop through all listed models and tune params for the first day of data
for model in models:
    forecaster = load_model(model)
    params = generate_param_grid(model, config)

    param_grid = params
    gscv = ForecastingGridSearchCV(
        forecaster=forecaster,
        # Simplify the dictionary so only one set of values are tested
        param_grid=param_grid,
        cv=cv,
        scoring=MeanSquaredError(square_root=True),
        # Raise errors so we can see what params are causing errors
        error_score="raise",
    )

    try:
        gscv.fit(y[["bg-0:00"]], X=y[["insulin-0:00"]])
        tested_models.append(
            {
                "model_name": model,
                "parameters": str(gscv.best_params_),
                "status": "Pass",
                "datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "error": "",
            }
        )
    except Exception as e:
        tested_models.append(
            {
                "model_name": model,
                "parameters": str(params),
                "status": "Fail",
                "datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "error": str(e),
            }
        )
        continue

pd.DataFrame(tested_models).to_csv("tuning_results.csv", index=False)

ValueError: Model 'ARIMA' not found in YAML config.