In [7]:
import os, math
import keras

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, trange
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Input, BatchNormalization
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statistics import pstdev, mean
from tensorflow.keras.models import load_model
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split, KFold
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

In [8]:
# Load the data/visualize
file_path = 'cleaned_file_data\cleaned_training\cleaned_540_t1d.csv'
data = pd.read_csv(file_path, index_col=0)

# Display the DataFrame
data.head()

Unnamed: 0_level_0,CGM,CGM_predict
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2027-05-19 11:30:00,68.8,85.666667
2027-05-19 12:00:00,85.666667,135.333333
2027-05-19 12:30:00,135.333333,131.0
2027-05-19 13:00:00,131.0,126.0
2027-05-19 13:30:00,126.0,124.833333


In [9]:
data.shape

(2004, 2)

In [10]:
def train_model_lstm_cgm(X_train, y_train):
    n_folds = 5
    cross_validation = KFold(n_folds)

    X_data = X_train
    y_data = y_train
    # input_dim = X_data.shape[1]
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)
    scaled_X_train_data = scaler_x.transform(X_data)
    scaled_y_train_data = scaler_y.transform(y_data)
    scaled_X_train_data = np.reshape(scaled_X_train_data, (scaled_X_train_data.shape[0], 1, scaled_X_train_data.shape[1]))

    lstm_best_score = []
    model_check_point_callback = keras.callbacks.ModelCheckpoint(
        filepath = 'new_lstm_cgm.h5',
        save_best_only = True,
        monitor = 'val_loss')
    early_stopping = keras.callbacks.EarlyStopping(patience=100)

    for train_id_x, val_id_x in cross_validation.split(scaled_X_train_data, scaled_y_train_data):
        X_train_fold, X_val_fold = scaled_X_train_data[train_id_x], scaled_X_train_data[val_id_x]
        y_train_fold, y_val_fold = scaled_y_train_data[train_id_x], scaled_y_train_data[val_id_x]

        model = Sequential()
        model.add(LSTM(128, input_shape = (scaled_X_train_data.shape[1], scaled_X_train_data.shape[2])))
        model.add(Dense(150, activation = 'relu'))
        model.add(Dropout(0.20))
        model.add(Dense(100, activation = 'relu'))
        model.add(Dropout(0.15))
        model.add(Dense(50, activation = 'relu'))
        model.add(Dense(20, activation = 'relu'))
        model.add(Dense(1, activation = 'relu'))
        model.compile(loss = 'mse', optimizer = 'adam')
        model.summary()
        model.fit(X_train_fold, y_train_fold,
                  epochs = 200, batch_size = 32, shuffle = False,
                  verbose=1,
                  validation_data = (X_val_fold, y_val_fold),
                  callbacks = [early_stopping, model_check_point_callback])
        lstm_best_score.append(model_check_point_callback.best)

In [11]:
X_train = pd.DataFrame(index = data.index, data = data.CGM, columns = ['CGM'])
print(X_train)

                            CGM
Time                           
2027-05-19 11:30:00   68.800000
2027-05-19 12:00:00   85.666667
2027-05-19 12:30:00  135.333333
2027-05-19 13:00:00  131.000000
2027-05-19 13:30:00  126.000000
...                         ...
2027-07-03 21:00:00  230.000000
2027-07-03 21:30:00  232.166667
2027-07-03 22:00:00  273.666667
2027-07-03 22:30:00  271.833333
2027-07-03 23:00:00  257.500000

[2004 rows x 1 columns]


In [12]:
y_train = pd.DataFrame(index = data.index, data = data.CGM_predict, columns = ['CGM_predict'])
print(y_train)

                     CGM_predict
Time                            
2027-05-19 11:30:00    85.666667
2027-05-19 12:00:00   135.333333
2027-05-19 12:30:00   131.000000
2027-05-19 13:00:00   126.000000
2027-05-19 13:30:00   124.833333
...                          ...
2027-07-03 21:00:00   232.166667
2027-07-03 21:30:00   273.666667
2027-07-03 22:00:00   271.833333
2027-07-03 22:30:00   257.500000
2027-07-03 23:00:00   255.666667

[2004 rows x 1 columns]


In [13]:
train_model_lstm_cgm(X_train, y_train)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               66560     
                                                                 
 dense (Dense)               (None, 150)               19350     
                                                                 
 dropout (Dropout)           (None, 150)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               15100     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 50)                5050      
                                                                 
 dense_3 (Dense)             (None, 20)                1

In [14]:
def normalized_root_mean_squared_error(true, pred):
    squared_error = np.square((true - pred))
    sum_squared_error = np.sum(squared_error)
    rmse = np.sqrt(sum_squared_error / true.size)
    nrmse_loss = round(rmse/np.std(true),3) # pred or true
    return nrmse_loss

In [15]:
def predict_by_model(model, data, print_individual_metrics):
    test_time = data.index
    test_gl_value = data['CGM']
    X_data = data.drop(columns = ['CGM_predict'])
    y_data = data[['CGM_predict']]
    input_dim = X_data.shape[1]

    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)

    X_test_data = data.drop(columns = ['CGM_predict'])
    y_test_data = data[['CGM_predict']]
    scaled_X_test_data = scaler_x.transform(X_test_data)
    scaled_X_test_data = np.reshape(scaled_X_test_data, (scaled_X_test_data.shape[0], 1, scaled_X_test_data.shape[1]))
    prediction = model.predict(scaled_X_test_data, batch_size = 32)
    scaled_prediction = scaler_y.inverse_transform(prediction)

    mae = mean_absolute_error(scaled_prediction, y_test_data)
    rmse = math.sqrt(mean_squared_error(scaled_prediction, y_test_data))
    nrmse = normalized_root_mean_squared_error(scaled_prediction, y_test_data.values)

    if print_individual_metrics == True:
        print(f"MAE: {round(mae,3)}")
        print(f"RMSE: {round(rmse,3)}")
        print(f"NRMSE: {round(nrmse,3)}")

    return (mae,
            rmse,
            nrmse,
            y_test_data.values,
            scaled_prediction)

In [16]:
# Load the model
from keras.models import load_model
model = load_model('new_lstm_cgm.h5')

predict_by_model(model, data, True)

MAE: 17.712
RMSE: 23.689
NRMSE: 0.487


(17.71221535234923,
 23.688856125226327,
 0.487,
 array([[ 85.66666667],
        [135.33333333],
        [131.        ],
        ...,
        [271.83333333],
        [257.5       ],
        [255.66666667]]),
 array([[ 79.58372 ],
        [ 90.984726],
        [134.19048 ],
        ...,
        [262.59616 ],
        [260.89462 ],
        [247.61195 ]], dtype=float32))