In [None]:
from src.data.data_loader import load_data
from src.data.data_transforms import create_time_diff_cols
from sktime.transformations.series.impute import Imputer

df = load_data(use_cached=True)
df = create_time_diff_cols(df)

### Use patient 1

In [85]:
patient_1 = df[df["p_num"] == "p01"]
print(len(patient_1))
full_len = len(patient_1)
patient_1

8459


Unnamed: 0,id,p_num,time,bg-0:00,insulin-0:00,carbs-0:00,hr-0:00,steps-0:00,cals-0:00,activity-0:00,time_diff,cob,carb_availability,insulin_availability,iob
0,p01_0,p01,06:10:00,15.1,0.0417,,,,,,NaT,0.0,0.0,0.000000,0.000000
1,p01_1,p01,06:25:00,14.4,0.0417,,,,,,0 days 00:15:00,0.0,0.0,0.000646,0.041261
2,p01_2,p01,06:40:00,13.9,0.0417,,,,,,0 days 00:15:00,0.0,0.0,0.002091,0.080352
3,p01_3,p01,06:55:00,13.8,0.0417,,,,,,0 days 00:15:00,0.0,0.0,0.003964,0.115980
4,p01_4,p01,07:10:00,13.4,0.0417,,,,,,0 days 00:15:00,0.0,0.0,0.005981,0.147553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8454,p01_8454,p01,22:50:00,6.2,0.0333,,76.8,70.0,5.00,,0 days 00:15:00,0.0,0.0,0.012917,0.214648
8455,p01_8455,p01,23:05:00,5.8,0.0167,,72.2,13.0,5.00,,0 days 00:15:00,0.0,0.0,0.012917,0.214648
8456,p01_8456,p01,23:20:00,5.4,0.0167,,73.4,18.0,5.10,,0 days 00:15:00,0.0,0.0,0.012659,0.198222
8457,p01_8457,p01,23:35:00,4.8,0.0167,,64.7,0.0,7.89,,0 days 00:15:00,0.0,0.0,0.012084,0.182661


### Reduce the size of the df

In [102]:
# We'll just tune the hyperparams on the first 100 timepoints for patient 1 since we're just testing
# full is 8459
row_to_load = 3000
cols = ["bg-0:00", "insulin-0:00", "iob", "cob"]

patient_1_trimmed = df.iloc[:row_to_load][cols]
print(len(patient_1_trimmed))
patient_1_trimmed

3000


Unnamed: 0,bg-0:00,insulin-0:00,iob,cob
0,15.1,0.0417,0.000000,0.0
1,14.4,0.0417,0.041261,0.0
2,13.9,0.0417,0.080352,0.0
3,13.8,0.0417,0.115980,0.0
4,13.4,0.0417,0.147553,0.0
...,...,...,...,...
2995,3.9,0.0583,1.314260,0.0
2996,6.2,0.0583,0.356437,0.0
2997,9.1,0.0583,0.360643,0.0
2998,10.9,0.0583,0.364364,0.0


### Handle missing values 

In [103]:
# Impute missing values
transformer = Imputer(method="nearest")
transformer.fit(patient_1_trimmed)
y = transformer.transform(patient_1_trimmed)
y

Unnamed: 0,bg-0:00,insulin-0:00,iob,cob
0,15.1,0.0417,0.000000,0.0
1,14.4,0.0417,0.041261,0.0
2,13.9,0.0417,0.080352,0.0
3,13.8,0.0417,0.115980,0.0
4,13.4,0.0417,0.147553,0.0
...,...,...,...,...
2995,3.9,0.0583,1.314260,0.0
2996,6.2,0.0583,0.356437,0.0
2997,9.1,0.0583,0.360643,0.0
2998,10.9,0.0583,0.364364,0.0


In [104]:
y[y.isna()].sum()

bg-0:00         0.0
insulin-0:00    0.0
iob             0.0
cob             0.0
dtype: float64

In [107]:
from sktime.benchmarking.forecasting import ForecastingBenchmark
from sktime.split import ExpandingWindowSplitter
from sktime.performance_metrics.forecasting import MeanSquaredError

x_cols = ["insulin-0:00", "cob", "iob"]


def load_diabetes():
    return (y[["bg-0:00"]], y[x_cols])


y_true, X = load_diabetes()
X

Unnamed: 0,insulin-0:00,cob,iob
0,0.0417,0.0,0.000000
1,0.0417,0.0,0.041261
2,0.0417,0.0,0.080352
3,0.0417,0.0,0.115980
4,0.0417,0.0,0.147553
...,...,...,...
2995,0.0583,0.0,1.314260
2996,0.0583,0.0,0.356437
2997,0.0583,0.0,0.360643
2998,0.0583,0.0,0.364364


### Benchmark

In [108]:
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

benchmark = ForecastingBenchmark()
cv_splitter = ExpandingWindowSplitter(
    initial_window=3,
    step_length=1,
    fh=3,
)
scorers = [MeanSquaredError(square_root=True)]

benchmark.add_task(
    dataset_loader=load_diabetes,
    cv_splitter=cv_splitter,
    scorers=scorers,
)


# estimator = ARIMA()
# estimator = NaiveForecaster()
estimator = ExponentialSmoothing()


estimator_id = estimator.__class__.__name__


benchmark.add_estimator(
    estimator=estimator,
    estimator_id=estimator_id,
)

benchmark.run(f"./{estimator_id}_results.csv")

Unnamed: 0,validation_id,model_id,runtime_secs,MeanSquaredError_fold_0_test,MeanSquaredError_fold_1_test,MeanSquaredError_fold_2_test,MeanSquaredError_fold_3_test,MeanSquaredError_fold_4_test,MeanSquaredError_fold_5_test,MeanSquaredError_fold_6_test,...,MeanSquaredError_fold_7987_test,MeanSquaredError_fold_7988_test,MeanSquaredError_fold_7989_test,MeanSquaredError_fold_7990_test,MeanSquaredError_fold_7991_test,MeanSquaredError_fold_7992_test,MeanSquaredError_fold_7993_test,MeanSquaredError_fold_7994_test,MeanSquaredError_mean,MeanSquaredError_std
0,[dataset=load_diabetes]_[cv_splitter=Expanding...,ExponentialSmoothing,144.027199,1.1,1.7,1.4,0.1,2.728571,2.3125,1.055556,...,0.7,1.0,1.5,1.2,0.7,0.5,2.7,3.5,1.915117,1.782279
