In [40]:
import os, math
import keras

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, trange
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Input, BatchNormalization
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statistics import pstdev, mean
from tensorflow.keras.models import load_model
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split, KFold
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

In [41]:
# Load the data/visualize
file_path = r'C:\Users\19176\Desktop\Ohio Data Set\data\prediabetic_file\prediabetic_cleaned_training\cleaned_Dexcom_006.csv'
data = pd.read_csv(file_path, index_col=0)

# Display the DataFrame
data.head()

Unnamed: 0_level_0,CGM,CGM_predict
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-28 12:00:00,107.0,106.333333
2020-02-28 12:30:00,106.333333,108.833333
2020-02-28 13:00:00,108.833333,118.5
2020-02-28 13:30:00,118.5,118.666667
2020-02-28 14:00:00,118.666667,114.833333


In [42]:
data.shape

(474, 2)

In [43]:
def train_model_lstm_cgm(X_train, y_train):
    n_folds = 5
    cross_validation = KFold(n_folds)

    X_data = X_train
    y_data = y_train
    # input_dim = X_data.shape[1]
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)
    scaled_X_train_data = scaler_x.transform(X_data)
    scaled_y_train_data = scaler_y.transform(y_data)
    scaled_X_train_data = np.reshape(scaled_X_train_data, (scaled_X_train_data.shape[0], 1, scaled_X_train_data.shape[1]))

    lstm_best_score = []
    model_check_point_callback = keras.callbacks.ModelCheckpoint(
        filepath = 'prediabetic_lstm_cgm.h5',
        save_best_only = True,
        monitor = 'val_loss')
    early_stopping = keras.callbacks.EarlyStopping(patience=100)

    for train_id_x, val_id_x in cross_validation.split(scaled_X_train_data, scaled_y_train_data):
        X_train_fold, X_val_fold = scaled_X_train_data[train_id_x], scaled_X_train_data[val_id_x]
        y_train_fold, y_val_fold = scaled_y_train_data[train_id_x], scaled_y_train_data[val_id_x]

        model = Sequential()
        model.add(LSTM(128, input_shape = (scaled_X_train_data.shape[1], scaled_X_train_data.shape[2])))
        model.add(Dense(150, activation = 'relu'))
        model.add(Dropout(0.20))
        model.add(Dense(100, activation = 'relu'))
        model.add(Dropout(0.15))
        model.add(Dense(50, activation = 'relu'))
        model.add(Dense(20, activation = 'relu'))
        model.add(Dense(1, activation = 'relu'))
        model.compile(loss = 'mse', optimizer = 'adam')
        model.summary()
        model.fit(X_train_fold, y_train_fold,
                  epochs = 200, batch_size = 32, shuffle = False,
                  verbose=1,
                  validation_data = (X_val_fold, y_val_fold),
                  callbacks = [early_stopping, model_check_point_callback])
        lstm_best_score.append(model_check_point_callback.best)

In [44]:
X_train = pd.DataFrame(index = data.index, data = data.CGM, columns = ['CGM'])
print(X_train)

                            CGM
Time                           
2020-02-28 12:00:00  107.000000
2020-02-28 12:30:00  106.333333
2020-02-28 13:00:00  108.833333
2020-02-28 13:30:00  118.500000
2020-02-28 14:00:00  118.666667
...                         ...
2020-03-09 06:30:00  107.500000
2020-03-09 07:00:00  104.833333
2020-03-09 07:30:00  103.000000
2020-03-09 08:00:00  106.333333
2020-03-09 08:30:00  149.833333

[474 rows x 1 columns]


In [45]:
y_train = pd.DataFrame(index = data.index, data = data.CGM_predict, columns = ['CGM_predict'])
print(y_train)

                     CGM_predict
Time                            
2020-02-28 12:00:00   106.333333
2020-02-28 12:30:00   108.833333
2020-02-28 13:00:00   118.500000
2020-02-28 13:30:00   118.666667
2020-02-28 14:00:00   114.833333
...                          ...
2020-03-09 06:30:00   104.833333
2020-03-09 07:00:00   103.000000
2020-03-09 07:30:00   106.333333
2020-03-09 08:00:00   149.833333
2020-03-09 08:30:00   138.333333

[474 rows x 1 columns]


In [46]:
train_model_lstm_cgm(X_train, y_train)

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 128)               66560     
                                                                 
 dense_50 (Dense)            (None, 150)               19350     
                                                                 
 dropout_20 (Dropout)        (None, 150)               0         
                                                                 
 dense_51 (Dense)            (None, 100)               15100     
                                                                 
 dropout_21 (Dropout)        (None, 100)               0         
                                                                 
 dense_52 (Dense)            (None, 50)                5050      
                                                                 
 dense_53 (Dense)            (None, 20)              

In [47]:
def normalized_root_mean_squared_error(true, pred):
    squared_error = np.square((true - pred))
    sum_squared_error = np.sum(squared_error)
    rmse = np.sqrt(sum_squared_error / true.size)
    nrmse_loss = round(rmse/np.std(true),3) # pred or true
    return nrmse_loss

In [48]:
def predict_by_model(model, data, print_individual_metrics):
    test_time = data.index
    test_gl_value = data['CGM']
    X_data = data.drop(columns = ['CGM_predict'])
    y_data = data[['CGM_predict']]
    input_dim = X_data.shape[1]

    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)

    X_test_data = data.drop(columns = ['CGM_predict'])
    y_test_data = data[['CGM_predict']]
    scaled_X_test_data = scaler_x.transform(X_test_data)
    scaled_X_test_data = np.reshape(scaled_X_test_data, (scaled_X_test_data.shape[0], 1, scaled_X_test_data.shape[1]))
    prediction = model.predict(scaled_X_test_data, batch_size = 32)
    scaled_prediction = scaler_y.inverse_transform(prediction)

    mae = mean_absolute_error(scaled_prediction, y_test_data)
    rmse = math.sqrt(mean_squared_error(scaled_prediction, y_test_data))
    nrmse = normalized_root_mean_squared_error(scaled_prediction, y_test_data.values)

    if print_individual_metrics == True:
        print(f"MAE: {round(mae,3)}")
        print(f"RMSE: {round(rmse,3)}")
        print(f"NRMSE: {round(nrmse,3)}")

    return (mae,
            rmse,
            nrmse,
            y_test_data.values,
            scaled_prediction)

In [49]:
# Load the model
from keras.models import load_model
model = load_model('prediabetic_lstm_cgm.h5')

predict_by_model(model, data, True)

MAE: 9.722
RMSE: 15.107
NRMSE: 0.65


(9.721736789923344,
 15.106688152191985,
 0.65,
 array([[106.33333333],
        [108.83333333],
        [118.5       ],
        [118.66666667],
        [114.83333333],
        [107.        ],
        [103.33333333],
        [100.16666667],
        [109.5       ],
        [122.16666667],
        [114.33333333],
        [105.66666667],
        [109.5       ],
        [109.16666667],
        [111.66666667],
        [136.33333333],
        [132.5       ],
        [122.66666667],
        [131.83333333],
        [126.83333333],
        [129.        ],
        [125.5       ],
        [186.33333333],
        [191.        ],
        [164.16666667],
        [154.        ],
        [155.66666667],
        [165.5       ],
        [164.33333333],
        [156.16666667],
        [141.66666667],
        [117.5       ],
        [129.83333333],
        [156.66666667],
        [133.83333333],
        [115.16666667],
        [111.16666667],
        [115.5       ],
        [116.5       ],
        [115.   