In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# --- Load and preprocess data ---
df = pd.read_csv("D:\PROGRAMMING\PROJECTS\PYTHON\TF\RESEARCH\RESEARCH\DATA\datafe.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)

# Smooth the CGM values (optional, helps remove spikes)
df['CGM (mg / dl)'] = df['CGM (mg / dl)'].rolling(window=3, min_periods=1).mean()

# Drop rows with missing CGM values
df = df.dropna(subset=['CGM (mg / dl)'])

# Extract time-based features
df['hour'] = df['Date'].dt.hour
df['minute'] = df['Date'].dt.minute
df['dayofweek'] = df['Date'].dt.dayofweek

# Standard scaling
scaler_cgm = StandardScaler()
df['CGM_scaled'] = scaler_cgm.fit_transform(df[['CGM (mg / dl)']])

scaler_time = StandardScaler()
df[['hour_scaled', 'minute_scaled', 'dayofweek_scaled']] = scaler_time.fit_transform(df[['hour', 'minute', 'dayofweek']])

# --- Prepare sequences ---
time_steps = 8  # past 2 hours if 15-min interval
future_steps = [1, 4, 8]  # 15, 60, 120 min ahead

features = ['CGM_scaled', 'hour_scaled', 'minute_scaled', 'dayofweek_scaled']

X, y = [], []

for i in range(len(df) - time_steps - max(future_steps)):
    x_seq = df[features].iloc[i:i+time_steps].values
    y_seq = [df['CGM_scaled'].iloc[i + time_steps + offset - 1] for offset in future_steps]
    X.append(x_seq)
    y.append(y_seq)

X = np.array(X)
y = np.array(y)

# --- Split train and test ---
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# --- Build enhanced Bidirectional LSTM model ---
model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True), input_shape=(time_steps, X.shape[2])),
    Dropout(0.3),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32)),
    Dropout(0.2),
    Dense(3)  # outputs: 15, 60, 120 min ahead
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

# --- Callbacks ---
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint("best_model.keras", monitor="val_loss", save_best_only=True)
]

# --- Train the model ---
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

  df = pd.read_csv("D:\PROGRAMMING\PROJECTS\PYTHON\TF\RESEARCH\RESEARCH\DATA\datafe.csv")
  df = pd.read_csv("D:\PROGRAMMING\PROJECTS\PYTHON\TF\RESEARCH\RESEARCH\DATA\datafe.csv")
  super().__init__(**kwargs)


Epoch 1/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 0.5759 - mae: 0.5518 - val_loss: 0.3943 - val_mae: 0.4904
Epoch 2/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.4659 - mae: 0.4948 - val_loss: 0.4207 - val_mae: 0.4988
Epoch 3/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.4518 - mae: 0.4916 - val_loss: 0.3976 - val_mae: 0.4871
Epoch 4/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.4437 - mae: 0.4852 - val_loss: 0.3693 - val_mae: 0.4701
Epoch 5/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.4213 - mae: 0.4790 - val_loss: 0.3966 - val_mae: 0.4841
Epoch 6/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.4298 - mae: 0.4776 - val_loss: 0.3842 - val_mae: 0.4803
Epoch 7/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.4179 

In [None]:
# --- Predict ---
y_pred = model.predict(X_test)

# Inverse transform only CGM values (not time features)
y_pred_inv = scaler_cgm.inverse_transform(y_pred)
y_test_inv = scaler_cgm.inverse_transform(y_test)

# --- Evaluation ---
time_labels = ['15 min', '60 min', '120 min']

for i, label in enumerate(time_labels):
    rmse = np.sqrt(mean_squared_error(y_test_inv[:, i], y_pred_inv[:, i]))
    mae = mean_absolute_error(y_test_inv[:, i], y_pred_inv[:, i])
    print(f"{label} Prediction -> RMSE: {rmse:.2f}, MAE: {mae:.2f}")

# --- Plot predictions ---
for i in range(3):
    plt.figure(figsize=(10, 4))
    plt.plot(y_test_inv[:, i], label=f"Actual {time_labels[i]} CGM")
    plt.plot(y_pred_inv[:, i], label=f"Predicted {time_labels[i]} CGM")
    plt.title(f"{time_labels[i]} CGM Prediction")
    plt.xlabel("Sample Index")
    plt.ylabel("CGM (mg / dl)")
    plt.legend()
    plt.grid(True)
    plt.show()