In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import pandas as pd
import os
import tensorflow as tf
from core.pre_processing import build_ohio_dataset, get_patient_window

In [7]:
root_path = "models"
store_to = "preds"

ORIG_FREQ = 5
SAMPL_FREQ = 5
FREQ_CORRECTION = ORIG_FREQ // SAMPL_FREQ

PATIENTS_2018 = ["559", "563", "570", "575", "588", "591"]
PATIENTS_2020 = ["540", "544", "552", "567", "584", "596"]

In [8]:
features = ["Time", "Glucose", "Rapid Insulin sub_tissue", "Carbohydrates gut", "finger_stick"]

In [15]:
# Load dataset
dataset_2020 = build_ohio_dataset("2020")

Using time to peak: 100 for patient 540
Identified 22 gaps for train set of 540
Identified 8 gaps for val set of 540
Identified 8 gaps for test set of 540
Using time to peak: 100 for patient 544
Identified 15 gaps for train set of 544
Identified 7 gaps for val set of 544
Identified 6 gaps for test set of 544
Using time to peak: 100 for patient 552
Identified 34 gaps for train set of 552
Identified 10 gaps for val set of 552
Identified 10 gaps for test set of 552
Empty carbs
Using time to peak: 100 for patient 567
Identified 46 gaps for train set of 567
Identified 10 gaps for val set of 567
Identified 11 gaps for test set of 567
Using time to peak: 100 for patient 584
Identified 45 gaps for train set of 584
Identified 14 gaps for val set of 584
Identified 15 gaps for test set of 584
Using time to peak: 100 for patient 596
Identified 23 gaps for train set of 596
Identified 2 gaps for val set of 596
Identified 6 gaps for test set of 596


In [32]:
# Load dataset
dataset_2018 = build_ohio_dataset("2018")

Using time to peak: 100 for patient 559
Identified 35 gaps for train set of 559
Identified 7 gaps for val set of 559
Identified 11 gaps for test set of 559
Using time to peak: 100 for patient 563
Identified 15 gaps for train set of 563
Identified 6 gaps for val set of 563
Identified 3 gaps for test set of 563
Using time to peak: 100 for patient 570
Identified 15 gaps for train set of 570
Identified 5 gaps for val set of 570
Identified 9 gaps for test set of 570
Using time to peak: 100 for patient 575
Identified 56 gaps for train set of 575
Identified 16 gaps for val set of 575
Identified 10 gaps for test set of 575
Using time to peak: 100 for patient 588
Identified 9 gaps for train set of 588
Identified 1 gaps for val set of 588
Identified 2 gaps for test set of 588
Using time to peak: 100 for patient 591
Identified 23 gaps for train set of 591
Identified 3 gaps for val set of 591
Identified 4 gaps for test set of 591


In [18]:
def generate_predictions(ph, dataset, batch_size, input_len, best_model_name):
    patient_windows = {}
    for p_tr_df, p_v_df, p_tst_df, patient_id in dataset:
        patient_windows[patient_id] = get_patient_window(
            (p_tr_df, p_v_df, p_tst_df),
            features,
            input_len // SAMPL_FREQ,
            1,
            ph,
            drop_noisy=False,
            batch_size=batch_size,
            min_max_scale=False
        )

    best_model = tf.keras.models.load_model(os.path.join(root_path, best_model_name))
    all_predictions = []
    all_targets = []
    patient_ids = []
    for patient_id, patient_w in patient_windows.items():
        print(f"Evaluating {patient_id}")
        for batch in patient_w.test:
            inputs, targets = batch
            targets = targets.numpy().flatten()
            predictions = best_model.predict(inputs, verbose=0).flatten()

            all_predictions.extend(predictions)
            all_targets.extend(targets)
            patient_ids.extend([patient_id for _ in range(len(targets))])
    return patient_ids, all_targets, all_predictions

## 2020 30min predictions

In [19]:
patient_ids, all_targets, all_predictions = generate_predictions(6, dataset_2020, 256, 30, "lmu_2020_ph30min_1")

Evaluating 540
all rolling: 3066
2881 size before reduction
2881 size after reduction
17 windows with too many nans
157 windows with empty labels
Evaluating 544
all rolling: 3136
2695 size before reduction
2695 size after reduction
19 windows with too many nans
411 windows with empty labels
Evaluating 552
all rolling: 3988
2332 size before reduction
2332 size after reduction
32 windows with too many nans
1613 windows with empty labels
Evaluating 567
all rolling: 2972
2349 size before reduction
2349 size after reduction
43 windows with too many nans
569 windows with empty labels
Evaluating 584
all rolling: 2995
2645 size before reduction
2645 size after reduction
27 windows with too many nans
312 windows with empty labels
Evaluating 596
all rolling: 3003
2712 size before reduction
2712 size after reduction
24 windows with too many nans
256 windows with empty labels


In [20]:
df_2020_30min = pd.DataFrame({"patient_id": patient_ids, "target": all_targets, "prediction": all_predictions})

In [30]:
df_2020_30min.to_csv(os.path.join(store_to, "2020_30min_preds.csv"))

## 2020 60min predictions

In [31]:
patient_ids_2020_60min, all_targets_2020_60min, all_predictions_2020_60min = generate_predictions(12, dataset_2020, 256, 30, "lmu_2020_ph60min_1")

Evaluating 540
all rolling: 3066
2869 size before reduction
2869 size after reduction
23 windows with too many nans
157 windows with empty labels
Evaluating 544
all rolling: 3136
2671 size before reduction
2671 size after reduction
37 windows with too many nans
411 windows with empty labels
Evaluating 552
all rolling: 3988
2296 size before reduction
2296 size after reduction
62 windows with too many nans
1613 windows with empty labels
Evaluating 567
all rolling: 2972
2300 size before reduction
2300 size after reduction
86 windows with too many nans
569 windows with empty labels
Evaluating 584
all rolling: 2995
2624 size before reduction
2624 size after reduction
42 windows with too many nans
312 windows with empty labels
Evaluating 596
all rolling: 3003
2687 size before reduction
2687 size after reduction
43 windows with too many nans
256 windows with empty labels


In [33]:
df_2020_60min = pd.DataFrame({"patient_id": patient_ids_2020_60min, "target": all_targets_2020_60min, "prediction": all_predictions_2020_60min})

In [35]:
df_2020_60min.to_csv(os.path.join(store_to, "2020_60min_preds.csv"))

## 2018 30min predictions

In [36]:
patient_ids_2018_30min, all_targets_2018_30min, all_predictions_2018_30min = generate_predictions(6, dataset_2018, 256, 30, "lmu_2018_ph30min_0")

Evaluating 559
all rolling: 2876
2482 size before reduction
2482 size after reduction
34 windows with too many nans
349 windows with empty labels
Evaluating 563
all rolling: 2814
2559 size before reduction
2559 size after reduction
10 windows with too many nans
234 windows with empty labels
Evaluating 570
all rolling: 2880
2727 size before reduction
2727 size after reduction
20 windows with too many nans
122 windows with empty labels
Evaluating 575
all rolling: 2745
2575 size before reduction
2575 size after reduction
15 windows with too many nans
144 windows with empty labels
Evaluating 588
all rolling: 2880
2777 size before reduction
2777 size after reduction
8 windows with too many nans
84 windows with empty labels
Evaluating 591
all rolling: 2847
2743 size before reduction
2743 size after reduction
10 windows with too many nans
83 windows with empty labels


In [37]:
df_2018_30min = pd.DataFrame({"patient_id": patient_ids_2018_30min, "target": all_targets_2018_30min, "prediction": all_predictions_2018_30min})

In [39]:
df_2018_30min.to_csv(os.path.join(store_to, "2018_30min_preds.csv"))

## 2018 60min predictions

In [40]:
patient_ids_2018_60min, all_targets_2018_60min, all_predictions_2018_60min = generate_predictions(12, dataset_2018, 256, 30, "lmu_2018_ph60min_3")

Evaluating 559
all rolling: 2876
2442 size before reduction
2442 size after reduction
68 windows with too many nans
349 windows with empty labels
Evaluating 563
all rolling: 2814
2550 size before reduction
2550 size after reduction
13 windows with too many nans
234 windows with empty labels
Evaluating 570
all rolling: 2880
2703 size before reduction
2703 size after reduction
38 windows with too many nans
122 windows with empty labels
Evaluating 575
all rolling: 2745
2554 size before reduction
2554 size after reduction
30 windows with too many nans
144 windows with empty labels
Evaluating 588
all rolling: 2880
2762 size before reduction
2762 size after reduction
17 windows with too many nans
84 windows with empty labels
Evaluating 591
all rolling: 2847
2725 size before reduction
2725 size after reduction
22 windows with too many nans
83 windows with empty labels


In [41]:
df_2018_60min = pd.DataFrame({"patient_id": patient_ids_2018_60min, "target": all_targets_2018_60min, "prediction": all_predictions_2018_60min})

In [43]:
df_2018_60min.to_csv(os.path.join(store_to, "2018_60min_preds.csv"))