In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional, BatchNormalization, Conv1D, MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.optimizers import Adam

In [37]:
data = pd.read_csv("./CSV_Files/glucose_data.csv")
if 'Glucose_time' not in data.columns:
    data["Glucose_time"] = pd.to_datetime(data['reading_time'], unit='ms')
    data['Glucose_time'] = pd.to_datetime(data['Glucose_time'], format='%Y-%m-%d %H:%M:%S')
    data = data.drop(columns=['reading_time'])
    data = data.sort_values(by='Glucose_time')

In [17]:
data = data.loc[:, ~data.columns.str.contains('^Unnamed')]
data["Glucose_time"] = pd.to_datetime(data["Glucose_time"])

In [18]:
df = data[['Glucose_time', 'reading']]
df.set_index('Glucose_time', inplace=True)
df = df.resample('5min').mean().interpolate(method='linear')
df.head()

Unnamed: 0_level_0,reading
Glucose_time,Unnamed: 1_level_1
2019-08-26 19:10:00,108.1092
2019-08-26 19:15:00,200.5546
2019-08-26 19:20:00,293.0
2019-08-26 19:25:00,293.0
2019-08-26 19:30:00,296.0


In [19]:
df.to_csv('./CSV_Files/glucose_data_resampled.csv')

In [20]:
df = pd.read_csv("./CSV_Files/glucose_data_resampled.csv")
# Drop all the columns which have unnamed in them
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

Unnamed: 0,Glucose_time,reading
0,2019-08-26 19:10:00,108.1092
1,2019-08-26 19:15:00,200.5546
2,2019-08-26 19:20:00,293.0
3,2019-08-26 19:25:00,293.0
4,2019-08-26 19:30:00,296.0


In [21]:
# If reading_time is present in the dataset, then convert it to a proper date time format
if 'Glucose_time' not in df.columns:
    df['reading_time'] = pd.to_datetime(df['reading_time'], unit='ms')
    df['reading_time'] = pd.to_datetime(df['reading_time'], format='%Y-%m-%d %H:%M:%S')
    df = df.rename(columns={'reading_time': 'Glucose_time'})
    df = df.sort_values(by='Glucose_time')
    df.head()

In [22]:
# Set the Glucose_time to datetime format and set it as the index
df['Glucose_time'] = pd.to_datetime(df['Glucose_time'])
df.set_index('Glucose_time', inplace=True)
df.head()

Unnamed: 0_level_0,reading
Glucose_time,Unnamed: 1_level_1
2019-08-26 19:10:00,108.1092
2019-08-26 19:15:00,200.5546
2019-08-26 19:20:00,293.0
2019-08-26 19:25:00,293.0
2019-08-26 19:30:00,296.0


In [23]:
checked_df = df.copy()

In [24]:
scaler = MinMaxScaler(feature_range=(0, 1))
df['reading'] = scaler.fit_transform(df[['reading']])
df.head()

Unnamed: 0_level_0,reading
Glucose_time,Unnamed: 1_level_1
2019-08-26 19:10:00,0.146562
2019-08-26 19:15:00,0.484365
2019-08-26 19:20:00,0.822168
2019-08-26 19:25:00,0.822168
2019-08-26 19:30:00,0.83313


In [25]:
df.shape

(1498, 1)

In [38]:
def prepare_data(time_series_data):
    X, y = [], []
    for i in range(len(time_series_data) - 1):
        X.append(time_series_data[i])
        y.append(time_series_data[i + 1])
    return np.array(X), np.array(y)

In [39]:
time_series_data = df['reading'].values

In [47]:
n_feat = 1
batch_size = 16
patience = 25
best_rmse = float('inf')
best_parameter = {}
best_predictions = None
best_model = None

In [48]:
def lr_schedule(epoch, lr):
    return lr * 0.995

In [49]:
X_train, y_train = prepare_data(time_series_data)
X_train.shape

(1497,)

In [50]:

print(f"Training model with n_features={n_feat}, dropout_rate=0.2, batch_size={batch_size}, patience={patience}")
train_data_scaled = time_series_data[:-20]
test_data_scaled = time_series_data[-(n_feat + 20):]
X_train, y_train = prepare_data(train_data_scaled)
X_test, y_test = prepare_data(test_data_scaled)
X_train = X_train.reshape(X_train.shape[0],1, 1)
X_test = X_test.reshape(X_test.shape[0], 1, 1)
# Define the model
                
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(1, 1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Dropout(0.2))
model.add(LSTM(units=200, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=150, activation='relu', return_sequences=True)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=100, activation='relu', return_sequences=True)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(units=50, activation='relu', return_sequences=False))  # Set return_sequences to False
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
        # Compile the model
optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
model.compile(optimizer=optimizer, loss='mse')
early_stop = EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True)
lr_scheduler = LearningRateScheduler(lr_schedule)
        # Train the model
history = model.fit(X_train, y_train, epochs=500, batch_size=batch_size, verbose=1, validation_split=0.2, callbacks=[early_stop, lr_scheduler])
predictions = []
curr_sequence = X_test[0].reshape(1, n_feat, 1)
for i in range(20):
        next_pred = model.predict(curr_sequence)
        predictions.append(next_pred[0, 0])
                # Update the sequence: drop the first value and add the new prediction
        curr_sequence = np.append(curr_sequence[:, 1:, :], next_pred.reshape(1, 1, 1), axis=1)
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
actual = checked_df['reading'].values[-20:]

rmse = np.sqrt(mean_squared_error(actual, predictions))
print(f"RMSE: {rmse}")

if rmse < best_rmse:
        best_rmse = rmse
        best_parameter = {'n_features': n_feat, 'dropout_rate': 0.2, 'batch_size': batch_size, 'patience': patience}
        best_predictions = predictions
        best_model = model

print(f"Best parameters: {best_parameter}")
print(f"Best RMSE: {best_rmse}")

Training model with n_features=1, dropout_rate=0.2, batch_size=16, patience=25


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 83ms/step - loss: 0.1895 - val_loss: 0.0939 - learning_rate: 9.9500e-04
Epoch 2/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 0.0376 - val_loss: 0.0637 - learning_rate: 9.9003e-04
Epoch 3/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0224 - val_loss: 0.0505 - learning_rate: 9.8507e-04
Epoch 4/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0196 - val_loss: 0.0492 - learning_rate: 9.8015e-04
Epoch 5/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0156 - val_loss: 0.0523 - learning_rate: 9.7525e-04
Epoch 6/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0137 - val_loss: 0.0427 - learning_rate: 9.7037e-04
Epoch 7/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0127 - val_loss

In [51]:
best_predictions = scaler.inverse_transform(np.array(best_predictions).reshape(-1, 1)).flatten()
y_test = scaler.inverse_transform(y_test[:20].reshape(-1, 1)).flatten()

# Create a DataFrame for actual vs predicted values
df_1 = pd.DataFrame({'Actual': y_test, 'Predicted': best_predictions})
df_1

Unnamed: 0,Actual,Predicted
0,333.444444,91666.55
1,337.555556,93764.54
2,341.666667,96767.35
3,337.666667,101062.6
4,336.0,107710.0
5,330.0,119650.3
6,324.0,144828.1
7,318.0,200892.2
8,312.5,337410.2
9,307.0,748948.3


In [54]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

def prepare_data(data, n_features):
    X, y = [], []
    for i in range(len(data) - n_features):
        X.append(data[i:i + n_features])
        y.append(data[i + n_features])
    return np.array(X), np.array(y)

def lr_schedule(epoch, lr):
    return lr * 0.995

# Load and scale data
time_series_data = df['reading'].values
scaler = MinMaxScaler()
time_series_data = scaler.fit_transform(time_series_data.reshape(-1, 1)).flatten()

batch_size_lst = [16, 32, 64]
patience_lst = [30]

best_rmse = float('inf')
best_parameters = {}
best_predictions = None
best_model = None

# Train and evaluate model function
def train_and_evaluate_model(batch_size, patience):
    n_feat = 1  # Using only one feature
    train_data_scaled = time_series_data[:-20]
    test_data_scaled = time_series_data[-(n_feat + 20):]

    X_train, y_train = prepare_data(train_data_scaled, n_feat)
    X_test, y_test = prepare_data(test_data_scaled, n_feat)

    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    model = Sequential()
    model.add(LSTM(units=200, activation='tanh', input_shape=(n_feat, 1), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Bidirectional(LSTM(units=150, activation='relu', return_sequences=True)))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Bidirectional(LSTM(units=100, activation='relu', return_sequences=True)))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(LSTM(units=50, activation='relu', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(100, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1))

    optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mse')

    early_stop = EarlyStopping(monitor='val_loss', patience=patience, verbose=1, restore_best_weights=True)
    lr_scheduler = LearningRateScheduler(lr_schedule)

    model.fit(X_train, y_train, epochs=500, batch_size=batch_size, verbose=1, validation_split=0.2, callbacks=[early_stop, lr_scheduler])

    predictions = []
    curr_sequence = X_test[0].reshape(1, n_feat, 1)
    for i in range(20):
        next_pred = model.predict(curr_sequence)
        predictions.append(next_pred[0, 0])
        curr_sequence = np.append(curr_sequence[:, 1:, :], next_pred.reshape(1, 1, 1), axis=1)

    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
    actual = checked_df['reading'].values[-20:]

    rmse = np.sqrt(mean_squared_error(actual, predictions))
    print(f"RMSE: {rmse}")

    return rmse, predictions, model

for batch_size in batch_size_lst:
    for patience in patience_lst:
        print(f"Training model with n_features=1, dropout_rate=0.2, batch_size={batch_size}, patience={patience}")
        rmse, predictions, model = train_and_evaluate_model(batch_size, patience)
        
        if rmse < best_rmse:
            best_rmse = rmse
            best_parameters = {'batch_size': batch_size, 'patience': patience}
            best_predictions = predictions
            best_model = model

print(f"Best parameters: {best_parameters}")
print(f"Best RMSE: {best_rmse}")

# Print the results
df_results = pd.DataFrame({'Actual': checked_df['reading'].values[-20:], 'Predicted': best_predictions})
print(df_results)

Training model with n_features=1, dropout_rate=0.2, batch_size=16, patience=30


  super().__init__(**kwargs)


Epoch 1/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 39ms/step - loss: 0.1333 - val_loss: 0.0897 - learning_rate: 9.9500e-04
Epoch 2/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0343 - val_loss: 0.0624 - learning_rate: 9.9003e-04
Epoch 3/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0201 - val_loss: 0.0522 - learning_rate: 9.8507e-04
Epoch 4/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.0178 - val_loss: 0.0499 - learning_rate: 9.8015e-04
Epoch 5/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0169 - val_loss: 0.0496 - learning_rate: 9.7525e-04
Epoch 6/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0129 - val_loss: 0.0497 - learning_rate: 9.7037e-04
Epoch 7/500
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.0098 - val_loss

  super().__init__(**kwargs)


Epoch 1/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 83ms/step - loss: 0.1530 - val_loss: 0.1032 - learning_rate: 9.9500e-04
Epoch 2/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 0.0313 - val_loss: 0.0756 - learning_rate: 9.9003e-04
Epoch 3/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0254 - val_loss: 0.0576 - learning_rate: 9.8507e-04
Epoch 4/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0203 - val_loss: 0.0498 - learning_rate: 9.8015e-04
Epoch 5/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.0170 - val_loss: 0.0496 - learning_rate: 9.7525e-04
Epoch 6/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0155 - val_loss: 0.0512 - learning_rate: 9.7037e-04
Epoch 7/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0125 - val_loss

  super().__init__(**kwargs)


Epoch 1/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 267ms/step - loss: 0.2856 - val_loss: 0.1179 - learning_rate: 9.9500e-04
Epoch 2/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - loss: 0.0740 - val_loss: 0.0956 - learning_rate: 9.9003e-04
Epoch 3/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.0420 - val_loss: 0.0811 - learning_rate: 9.8507e-04
Epoch 4/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - loss: 0.0336 - val_loss: 0.0675 - learning_rate: 9.8015e-04
Epoch 5/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - loss: 0.0359 - val_loss: 0.0573 - learning_rate: 9.7525e-04
Epoch 6/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step - loss: 0.0260 - val_loss: 0.0541 - learning_rate: 9.7037e-04
Epoch 7/500
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 0.0204 - val_los

In [55]:
df_results

Unnamed: 0,Actual,Predicted
0,333.444444,1.098885
1,337.555556,1.46664
2,341.666667,2.640996
3,337.666667,7.504671
4,336.0,31.163055
5,330.0,79.714661
6,324.0,80.888893
7,318.0,80.708893
8,312.5,80.73481
9,307.0,80.730942


In [59]:
one_value = [259]
# Now using the above trained model I want the next 20 values

# Now using the model.predict() function to get the next 20 values
predictions = []

curr_sequence = one_value
for i in range(20):
    next_pred = best_model.predict(curr_sequence.reshape(1, 1, 1))
    predictions.append(next_pred[0, 0])
    curr_sequence = np.append(curr_sequence[1:], next_pred)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed