In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv("./CSV_Files/glucose_data_resampled.csv")
# Drop all the columns which have unnamed in them
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

Unnamed: 0,Glucose_time,reading
0,2023-03-05 00:05:00,90.0
1,2023-03-05 00:10:00,95.0
2,2023-03-05 00:15:00,96.0
3,2023-03-05 00:20:00,96.0
4,2023-03-05 00:25:00,96.0


In [4]:
# Set the Glucose_time to datetime format and set it as the index
df['Glucose_time'] = pd.to_datetime(df['Glucose_time'])
df.set_index('Glucose_time', inplace=True)
df.head()

Unnamed: 0_level_0,reading
Glucose_time,Unnamed: 1_level_1
2023-03-05 00:05:00,90.0
2023-03-05 00:10:00,95.0
2023-03-05 00:15:00,96.0
2023-03-05 00:20:00,96.0
2023-03-05 00:25:00,96.0


In [5]:
checked_df = df.copy()
scaler = MinMaxScaler(feature_range=(0, 1))
df['reading'] = scaler.fit_transform(df[['reading']])
df.head()

Unnamed: 0_level_0,reading
Glucose_time,Unnamed: 1_level_1
2023-03-05 00:05:00,0.521127
2023-03-05 00:10:00,0.591549
2023-03-05 00:15:00,0.605634
2023-03-05 00:20:00,0.605634
2023-03-05 00:25:00,0.605634


In [1]:
def prepare_data(time_series_data, n_features):
    X, y = [], []
    for i in range(len(time_series_data)):
        end_ix = i + n_features
        if end_ix > len(time_series_data)-1:
            break
        seq_x, seq_y = time_series_data[i:end_ix], time_series_data[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [10]:
n_features = 1
patience_lst = [5, 10, 15, 20]


In [7]:
time_series_data = df['reading'].values

In [8]:
best_parameter = {}
best_rmse = float('inf')
best_predictions = None
best_model = None

In [11]:
for pat in patience_lst:
    X, y = prepare_data(time_series_data, n_features)
    X = X.reshape((X.shape[0], X.shape[1], 1))

    test_size = 18
    val_size = 24
    train_size = X.shape[0] + (n_features - 5) - test_size - val_size

    X_train, y_train = X[:train_size], y[:train_size]
    X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
    X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

        # Print the shapes of the train, validation and test sets
    print("Train Shape: ", X_train.shape, y_train.shape)
    print("Validation Shape: ", X_val.shape, y_val.shape)
    print("Test Shape: ", X_test.shape, y_test.shape)

        
        # Building the LSTM Model
    model = Sequential()
    model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_features, 1)))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

        # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=pat, verbose=1)

        # Fitting the model
    model.fit(X_train, y_train, epochs = 300, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)

        # Choosing the best model based on the validation loss
    predictions = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, predictions))
    if rmse < best_rmse:
        best_rmse = rmse
        best_parameter['n_features'] = 1
        best_parameter['patience'] = pat
        best_predictions = predictions
        best_model = model

print("*" * 50)
print("Best Parameters: ", best_parameter)
print("Best RMSE: ", best_rmse)
print("Prediction :- ", best_predictions)

Train Shape:  (240, 1, 1) (240,)
Validation Shape:  (24, 1, 1) (24,)
Test Shape:  (22, 1, 1) (22,)


  super().__init__(**kwargs)


Epoch 17: early stopping
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 753ms/step
Train Shape:  (240, 1, 1) (240,)
Validation Shape:  (24, 1, 1) (24,)
Test Shape:  (22, 1, 1) (22,)


  super().__init__(**kwargs)


Epoch 58: early stopping
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
Train Shape:  (240, 1, 1) (240,)
Validation Shape:  (24, 1, 1) (24,)
Test Shape:  (22, 1, 1) (22,)


  super().__init__(**kwargs)


Epoch 27: early stopping
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Train Shape:  (240, 1, 1) (240,)
Validation Shape:  (24, 1, 1) (24,)
Test Shape:  (22, 1, 1) (22,)


  super().__init__(**kwargs)


Epoch 36: early stopping
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
**************************************************
Best Parameters:  {'n_features': 1, 'patience': 10}
Best RMSE:  0.02363069333853135
Prediction :-  [[0.6981262 ]
 [0.6704693 ]
 [0.6569299 ]
 [0.64358073]
 [0.5796164 ]
 [0.588921  ]
 [0.59832674]
 [0.6174447 ]
 [0.59832674]
 [0.5796164 ]
 [0.56736594]
 [0.561307  ]
 [0.5552918 ]
 [0.5796164 ]
 [0.58580834]
 [0.592045  ]
 [0.6046537 ]
 [0.6046537 ]
 [0.6046537 ]
 [0.6046537 ]
 [0.5796164 ]
 [0.58580834]
 [0.592045  ]
 [0.6304197 ]]


In [12]:
validation_predictions_in_original_scale = scaler.inverse_transform(best_predictions)
xlst = validation_predictions_in_original_scale.flatten()
xlst

array([102.56696 , 100.603325,  99.64202 ,  98.69423 ,  94.15277 ,
        94.81339 ,  95.4812  ,  96.83857 ,  95.4812  ,  94.15277 ,
        93.28298 ,  92.8528  ,  92.42572 ,  94.15277 ,  94.59239 ,
        95.035194,  95.93042 ,  95.93042 ,  95.93042 ,  95.93042 ,
        94.15277 ,  94.59239 ,  95.035194,  97.759796], dtype=float32)

In [13]:
actual_values = df['reading'].values[train_size:train_size+val_size]
actual_values = actual_values.reshape(-1, 1)
actual_values_in_original_scale = scaler.inverse_transform(actual_values)
actual_values_in_original_scale.flatten()

array([104.  , 102.  , 101.  , 100.  ,  95.  ,  95.75,  96.5 ,  98.  ,
        96.5 ,  95.  ,  94.  ,  93.5 ,  93.  ,  95.  ,  95.5 ,  96.  ,
        97.  ,  97.  ,  97.  ,  97.  ,  95.  ,  95.5 ,  96.  ,  99.  ])

In [16]:
# Now I want it to be printed in the form of a dataframe with the predicted values with a shift of 1 and the actual values

final = pd.DataFrame()
# Append the time of time series from values 240 to 264
final['time'] = df.index[train_size:train_size+val_size]


# Append the precited values with a shift of 1 and the predicted value at 240 being the mean of actual values at 237, 238 and 239. Append the mean first and then the predicted values
final['Shifted_prediction'] = [np.mean(actual_values_in_original_scale[237:240])] + validation_predictions_in_original_scale.flatten().tolist()[:-1]
final['unshifted_prediction'] = xlst.flatten().tolist()

# Append the actual values
final['actual'] = actual_values_in_original_scale.flatten()

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [17]:
final

Unnamed: 0,time,Shifted_prediction,unshifted_prediction,actual
0,2023-03-05 20:05:00,,102.566963,104.0
1,2023-03-05 20:10:00,102.566963,100.603325,102.0
2,2023-03-05 20:15:00,100.603325,99.642021,101.0
3,2023-03-05 20:20:00,99.642021,98.694229,100.0
4,2023-03-05 20:25:00,98.694229,94.152771,95.0
5,2023-03-05 20:30:00,94.152771,94.813393,95.75
6,2023-03-05 20:35:00,94.813393,95.481201,96.5
7,2023-03-05 20:40:00,95.481201,96.83857,98.0
8,2023-03-05 20:45:00,96.83857,95.481201,96.5
9,2023-03-05 20:50:00,95.481201,94.152771,95.0


In [21]:
# Now we have a value of Glucose Reading which is a single value. And we need to predict the next value of Glucose Reading. So, we will use the LSTM model to predict next 10 values of Glucose Reading
Reading_Given = 116.0
Reading_Given_scaled = scaler.transform(np.array([[Reading_Given]]))

n_features = 1

# Now we will predict the next 10 values of Glucose Reading
X_new = Reading_Given_scaled
X_new = X_new.reshape((1, n_features, 1))
predictions = []
for i in range(10):
    pred = best_model.predict(X_new)
    predictions.append(pred)
    X_new = pred
    X_new = X_new.reshape((1, n_features, 1))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step


In [22]:
predictions = np.array(predictions)
predictions = predictions.reshape(-1, 1)
predictions_in_original_scale = scaler.inverse_transform(predictions)
predictions_in_original_scale.flatten()

array([115.543884, 115.01166 , 114.39469 , 113.68498 , 112.87582 ,
       111.96268 , 110.94413 , 109.822784, 108.605125, 107.301994],
      dtype=float32)