In [39]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam

# Load the dataset
data = pd.read_csv("Bussiness_facility/combined_data.csv")

# Data preprocessing
data['Time'] = pd.to_datetime(data['Time'])
data.set_index('Time', inplace=True)

# Normalize numerical features
scaler = MinMaxScaler()
data[['BG', 'CGM', 'CHO', 'insulin', 'LBGI', 'HBGI', 'Risk']] = scaler.fit_transform(data[['BG', 'CGM', 'CHO', 'insulin', 'LBGI', 'HBGI', 'Risk']])

# Encode categorical variables
encoder = LabelEncoder()
data['Patient'] = encoder.fit_transform(data['Patient'])

print(data.index)

# Ensure the index is sorted in chronological order
if not data.index.is_monotonic_increasing:
    data = data.sort_index()

# Sequence generation
def generate_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq_data = data.iloc[i:i+seq_length]
#         print("seq_data: ", seq_data)
        seq_target = data.iloc[i+seq_length]['BG']  # Assuming predicting BG
#         print("seq_target: ", seq_target)
        sequences.append(seq_data.values)
#         print("sequences: ", sequences)
        targets.append(seq_target)
#         print("target: ", targets)
    return np.array(sequences), np.array(targets)

sequence_length = 1
X, y = generate_sequences(data, sequence_length)

print(X)
print(y)

# Reshape X to 3D array (samples, timesteps, features)
X = X.reshape(X.shape[0], sequence_length, -1)
print("Reshaped X: ", X.shape)

# Ensure that the number of features matches the input shape expected by the LSTM layer
num_features = X.shape[2]

# Model definition
model = Sequential()
model.add(LSTM(units=50, input_shape=(sequence_length, num_features)))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Model training
model.fit(X, y, epochs=10, batch_size=32)


# Model evaluation
# For simplicity, let's use the last portion of the data as the test set
test_data = data.iloc[-100:]
X_test, y_test = generate_sequences(test_data, sequence_length)
X_test = X_test.reshape(X_test.shape[0], sequence_length, -1)
loss = model.evaluate(X_test, y_test)
print("Test Loss:", loss)

DatetimeIndex(['2023-10-25 06:00:00', '2023-10-25 06:05:00',
               '2023-10-25 06:10:00', '2023-10-25 06:15:00',
               '2023-10-25 06:20:00', '2023-10-25 06:25:00',
               '2023-10-25 06:30:00', '2023-10-25 06:35:00',
               '2023-10-25 06:40:00', '2023-10-25 06:45:00',
               ...
               '2023-10-30 05:10:00', '2023-10-30 05:15:00',
               '2023-10-30 05:20:00', '2023-10-30 05:25:00',
               '2023-10-30 05:30:00', '2023-10-30 05:35:00',
               '2023-10-30 05:40:00', '2023-10-30 05:45:00',
               '2023-10-30 05:50:00', '2023-10-30 05:55:00'],
              dtype='datetime64[ns]', name='Time', length=31680, freq=None)
[[[0.00000000e+00 3.13244862e-01 2.82333568e-01 ... 8.22903513e-03
   1.70632351e-03 0.00000000e+00]]

 [[2.16000000e+04 2.67198642e-01 2.31332559e-01 ... 0.00000000e+00
   1.78810497e-04 1.00000000e+00]]

 [[1.72800000e+04 2.81023100e-01 2.46594088e-01 ... 7.36231309e-05
   1.52660344e-05 1.0

  super().__init__(**kwargs)


[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0715
Epoch 2/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0202
Epoch 3/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0194
Epoch 4/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0192
Epoch 5/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0193
Epoch 6/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0192
Epoch 7/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0193
Epoch 8/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0192
Epoch 9/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0187
Epoch 10/10
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0193


In [31]:
data.index.min()

Timestamp('2023-10-25 06:00:00')

In [32]:
pd.Timedelta(days=1)

Timedelta('1 days 00:00:00')

In [33]:
specific_date = pd.Timestamp('2024-01-15')
specific_date

Timestamp('2024-01-15 00:00:00')

In [41]:
specific_date = pd.Timestamp('2023-10-30')

# Check if the specific date exists in the dataset
if specific_date in data.index:
    # Extract data for the specific date
    input_data = data.loc[specific_date - pd.Timedelta(days=sequence_length):specific_date]
    print(input_data)

    # Check if there is enough data for inference
    if len(input_data) >= sequence_length:
        try:
            # Preprocess input data
            input_data[['BG', 'CGM', 'CHO', 'insulin', 'LBGI', 'HBGI', 'Risk']] = scaler.transform(input_data[['BG', 'CGM', 'CHO', 'insulin', 'LBGI', 'HBGI', 'Risk']])
            input_data['Patient'] = encoder.fit_transform(input_data['Patient'])
        except KeyError as e:
            print("Error:", e)
            print("Skipping preprocessing for Patient column.")

        # Generate sequence for input data
        X_inference = input_data.values.reshape(1, sequence_length, -1)
        print("Shape of X_inference:", X_inference.shape)


        # Make predictions
        predicted_bg = model.predict(X_inference)

        # Optionally inverse transform the prediction
        predicted_bg = scaler.inverse_transform(predicted_bg)

        print("Predicted BG for {}: {}".format(specific_date, predicted_bg))
    else:
        print("Not enough data available for inference.")
else:
    print("Selected date does not exist in the dataset.")


            Unnamed: 0        BG       CGM  CHO   insulin      LBGI      HBGI  \
Time                                                                            
2023-10-29       19800  0.244653  0.203085  0.0  0.005167  0.001884  0.000000   
2023-10-29       16920  0.185279  0.135784  0.0  0.003795  0.018660  0.000000   
2023-10-29       21240  0.127987  0.073126  0.0  0.002902  0.065764  0.000000   
2023-10-29       12600  0.204869  0.158231  0.0  0.003788  0.010647  0.000000   
2023-10-29       18360  0.181630  0.134075  0.0  0.003051  0.020500  0.000000   
...                ...       ...       ...  ...       ...       ...       ...   
2023-10-30        4248  0.313289  0.288386  0.0  0.001869  0.000000  0.008248   
2023-10-30       20088  0.169969  0.131010  0.0  0.005167  0.027227  0.000000   
2023-10-30       11448  0.207320  0.172888  0.0  0.003391  0.009842  0.000000   
2023-10-30        2808  0.199927  0.163822  0.0  0.000985  0.012395  0.000000   
2023-10-30        7128  0.22

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  input_data[['BG', 'CGM', 'CHO', 'insulin', 'LBGI', 'HBGI', 'Risk']] = scaler.transform(input_data[['BG', 'CGM', 'CHO', 'insulin', 'LBGI', 'HBGI', 'Risk']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  input_data['Patient'] = encoder.fit_transform(input_data['Patient'])


ValueError: Exception encountered when calling LSTMCell.call().

[1mDimensions must be equal, but are 57222 and 9 for '{{node sequential_12_1/lstm_12_1/lstm_cell_1/MatMul}} = MatMul[T=DT_FLOAT, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](sequential_12_1/lstm_12_1/strided_slice_1, sequential_12_1/lstm_12_1/lstm_cell_1/Cast/ReadVariableOp)' with input shapes: [1,57222], [9,200].[0m

Arguments received by LSTMCell.call():
  • inputs=tf.Tensor(shape=(1, 57222), dtype=float32)
  • states=('tf.Tensor(shape=(1, 50), dtype=float32)', 'tf.Tensor(shape=(1, 50), dtype=float32)')
  • training=False