In [32]:
import os, math
import keras

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, trange
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Input, BatchNormalization
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statistics import pstdev, mean
from tensorflow.keras.models import load_model
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split, KFold
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

In [33]:
# Load the data/visualize
file_path = r'C:\Users\19176\Desktop\Ohio Data Set\data\t2_diabetes\t2_cleaned_training_folder\cleaned_2015_0_20210203.csv'
data = pd.read_csv(file_path, index_col=0)

# Display the DataFrame
data.head()

Unnamed: 0_level_0,CGM,CGM_predict
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-02-03 14:30:00,131.4,125.1
2021-02-03 15:00:00,125.1,120.6
2021-02-03 15:30:00,120.6,117.9
2021-02-03 16:00:00,117.9,129.6
2021-02-03 16:30:00,129.6,136.8


In [34]:
data.shape

(669, 2)

In [35]:
def train_model_lstm_cgm(X_train, y_train):
    n_folds = 5
    cross_validation = KFold(n_folds)

    X_data = X_train
    y_data = y_train
    # input_dim = X_data.shape[1]
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)
    scaled_X_train_data = scaler_x.transform(X_data)
    scaled_y_train_data = scaler_y.transform(y_data)
    scaled_X_train_data = np.reshape(scaled_X_train_data, (scaled_X_train_data.shape[0], 1, scaled_X_train_data.shape[1]))

    lstm_best_score = []
    model_check_point_callback = keras.callbacks.ModelCheckpoint(
        filepath = 't2_lstm_cgm.h5',
        save_best_only = True,
        monitor = 'val_loss')
    early_stopping = keras.callbacks.EarlyStopping(patience=100)

    for train_id_x, val_id_x in cross_validation.split(scaled_X_train_data, scaled_y_train_data):
        X_train_fold, X_val_fold = scaled_X_train_data[train_id_x], scaled_X_train_data[val_id_x]
        y_train_fold, y_val_fold = scaled_y_train_data[train_id_x], scaled_y_train_data[val_id_x]

        model = Sequential()
        model.add(LSTM(128, input_shape = (scaled_X_train_data.shape[1], scaled_X_train_data.shape[2])))
        model.add(Dense(150, activation = 'relu'))
        model.add(Dropout(0.20))
        model.add(Dense(100, activation = 'relu'))
        model.add(Dropout(0.15))
        model.add(Dense(50, activation = 'relu'))
        model.add(Dense(20, activation = 'relu'))
        model.add(Dense(1, activation = 'relu'))
        model.compile(loss = 'mse', optimizer = 'adam')
        model.summary()
        model.fit(X_train_fold, y_train_fold,
                  epochs = 200, batch_size = 32, shuffle = False,
                  verbose=1,
                  validation_data = (X_val_fold, y_val_fold),
                  callbacks = [early_stopping, model_check_point_callback])
        lstm_best_score.append(model_check_point_callback.best)

In [36]:
X_train = pd.DataFrame(index = data.index, data = data.CGM, columns = ['CGM'])
print(X_train)

                       CGM
Time                      
2021-02-03 14:30:00  131.4
2021-02-03 15:00:00  125.1
2021-02-03 15:30:00  120.6
2021-02-03 16:00:00  117.9
2021-02-03 16:30:00  129.6
...                    ...
2021-02-17 10:30:00  175.5
2021-02-17 11:00:00  151.2
2021-02-17 11:30:00  135.9
2021-02-17 12:00:00  136.8
2021-02-17 12:30:00  161.1

[669 rows x 1 columns]


In [37]:
y_train = pd.DataFrame(index = data.index, data = data.CGM_predict, columns = ['CGM_predict'])
print(y_train)

                     CGM_predict
Time                            
2021-02-03 14:30:00        125.1
2021-02-03 15:00:00        120.6
2021-02-03 15:30:00        117.9
2021-02-03 16:00:00        129.6
2021-02-03 16:30:00        136.8
...                          ...
2021-02-17 10:30:00        151.2
2021-02-17 11:00:00        135.9
2021-02-17 11:30:00        136.8
2021-02-17 12:00:00        161.1
2021-02-17 12:30:00        183.6

[669 rows x 1 columns]


In [38]:
train_model_lstm_cgm(X_train, y_train)

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_5 (LSTM)               (None, 128)               66560     
                                                                 
 dense_25 (Dense)            (None, 150)               19350     
                                                                 
 dropout_10 (Dropout)        (None, 150)               0         
                                                                 
 dense_26 (Dense)            (None, 100)               15100     
                                                                 
 dropout_11 (Dropout)        (None, 100)               0         
                                                                 
 dense_27 (Dense)            (None, 50)                5050      
                                                                 
 dense_28 (Dense)            (None, 20)               

In [39]:
def normalized_root_mean_squared_error(true, pred):
    squared_error = np.square((true - pred))
    sum_squared_error = np.sum(squared_error)
    rmse = np.sqrt(sum_squared_error / true.size)
    nrmse_loss = round(rmse/np.std(true),3) # pred or true
    return nrmse_loss

In [40]:
def predict_by_model(model, data, print_individual_metrics):
    test_time = data.index
    test_gl_value = data['CGM']
    X_data = data.drop(columns = ['CGM_predict'])
    y_data = data[['CGM_predict']]
    input_dim = X_data.shape[1]

    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)

    X_test_data = data.drop(columns = ['CGM_predict'])
    y_test_data = data[['CGM_predict']]
    scaled_X_test_data = scaler_x.transform(X_test_data)
    scaled_X_test_data = np.reshape(scaled_X_test_data, (scaled_X_test_data.shape[0], 1, scaled_X_test_data.shape[1]))
    prediction = model.predict(scaled_X_test_data, batch_size = 32)
    scaled_prediction = scaler_y.inverse_transform(prediction)

    mae = mean_absolute_error(scaled_prediction, y_test_data)
    rmse = math.sqrt(mean_squared_error(scaled_prediction, y_test_data))
    nrmse = normalized_root_mean_squared_error(scaled_prediction, y_test_data.values)

    if print_individual_metrics == True:
        print(f"MAE: {round(mae,3)}")
        print(f"RMSE: {round(rmse,3)}")
        print(f"NRMSE: {round(nrmse,3)}")

    return (mae,
            rmse,
            nrmse,
            y_test_data.values,
            scaled_prediction)

In [41]:
# Load the model
from keras.models import load_model
model = load_model('t2_lstm_cgm.h5')

predict_by_model(model, data, True)

MAE: 14.846
RMSE: 20.647
NRMSE: 0.554


(14.846038752215145,
 20.646938391941553,
 0.554,
 array([[125.1],
        [120.6],
        [117.9],
        [129.6],
        [136.8],
        [137.7],
        [163.8],
        [188.1],
        [201.6],
        [225.9],
        [233.1],
        [207.9],
        [203.4],
        [200.7],
        [177.3],
        [139.5],
        [144.9],
        [157.5],
        [151.2],
        [139.5],
        [130.5],
        [144.9],
        [144.9],
        [141.3],
        [147.6],
        [144. ],
        [139.5],
        [146.7],
        [142.2],
        [140.4],
        [147.6],
        [160.2],
        [167.4],
        [162. ],
        [229.5],
        [312.3],
        [330.3],
        [294.3],
        [229.5],
        [172.8],
        [143.1],
        [131.4],
        [160.2],
        [209.7],
        [234. ],
        [250.2],
        [246.6],
        [243. ],
        [220.5],
        [234.9],
        [233.1],
        [225.9],
        [200.7],
        [181.8],
        [177.3],
        [193.5]