In [14]:
import os
import pandas as pd
import numpy as np

# For time-series modeling
import darts
from darts import TimeSeries

# For ML
import torch
import tensorflow as tf




print("Setup complete!")


Setup complete!


In [15]:
import pandas as pd
import zipfile
import io
import os

# Path to the zip file
zip_path = '/content/Preprocessed.zip'  # Change path if needed

# Open and read zip file
all_data = []

with zipfile.ZipFile(zip_path, 'r') as z:
    # Filter for CSV files in the 'Preprocessed' folder
    csv_files = sorted([f for f in z.namelist() if f.startswith('Preprocessed/') and f.endswith('.csv')])

    for file in csv_files:
        # Extract patient ID from filename
        patient_id = os.path.basename(file).replace(".csv", "")

        # Read CSV from the zip directly using semicolon delimiter
        with z.open(file) as f:
            df = pd.read_csv(f, delimiter=';', parse_dates=['time'])
            df['patient_id'] = patient_id
            all_data.append(df)

# Combine all into one DataFrame
cgm_df = pd.concat(all_data, ignore_index=True)

# Preview
print("Total records:", len(cgm_df))
print("Columns:", cgm_df.columns.tolist())
cgm_df.head()


Total records: 309392
Columns: ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input', 'patient_id']


Unnamed: 0,time,glucose,calories,heart_rate,steps,basal_rate,bolus_volume_delivered,carb_input,patient_id
0,2018-06-13 18:40:00,332.0,6.3595,82.322835,34.0,0.091667,0.0,0.0,HUPA0001P
1,2018-06-13 18:45:00,326.0,7.728,83.740157,0.0,0.091667,0.0,0.0,HUPA0001P
2,2018-06-13 18:50:00,330.0,4.7495,80.52518,0.0,0.091667,0.0,0.0,HUPA0001P
3,2018-06-13 18:55:00,324.0,6.3595,89.129032,20.0,0.091667,0.0,0.0,HUPA0001P
4,2018-06-13 19:00:00,306.0,5.152,92.495652,0.0,0.075,0.0,0.0,HUPA0001P


In [16]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Parameters
past_window = 72        # 6 hours @ 5-min intervals
future_horizon = 6      # 30 min into the future
step = 1                # sliding window stride

features = ['glucose', 'calories', 'heart_rate', 'steps',
            'basal_rate', 'bolus_volume_delivered', 'carb_input']

X, y = [], []
patient_scalers = {}  # Optional: keep scalers for each patient for later inference

for patient_id, group in cgm_df.groupby('patient_id'):
    group = group.sort_values('time').reset_index(drop=True)

    # Fill missing values
    data = group[features].fillna(method='ffill').fillna(method='bfill')

    # Scale features PER PATIENT (key for generalization and avoiding leakage)
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)

    # Optional: save scaler per patient for inference use later
    patient_scalers[patient_id] = scaler

    # Sliding window
    for i in range(0, len(data_scaled) - past_window - future_horizon + 1, step):
        past_seq = data_scaled[i : i + past_window]
        future_val = data_scaled[i + past_window + future_horizon - 1][0]  # glucose only
        X.append(past_seq)
        y.append(future_val)

X = np.array(X)
y = np.array(y)

print("Shape of X:", X.shape)  # (samples, 72, features)
print("Shape of y:", y.shape)  # (samples,)


  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data

Shape of X: (307467, 72, 7)
Shape of y: (307467,)


In [18]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split

# === 1. Convert to tensors ===
X_tensor = torch.tensor(X, dtype=torch.float32)   # Shape: (N, 72, 7)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Shape: (N, 1)

# === 2. Split into train, val, test ===
dataset = TensorDataset(X_tensor, y_tensor)
total_size = len(dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size  # Remaining 15%

# Optional: for reproducibility
torch.manual_seed(42)
train_ds, val_test_ds = random_split(dataset, [train_size, total_size - train_size])
val_ds, test_ds = random_split(val_test_ds, [val_size, test_size])

# === 3. Create DataLoaders ===
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=128)
test_loader = DataLoader(test_ds, batch_size=128)

# === 4. Define the LSTM model ===
class GlucoseLSTM(nn.Module):
    def __init__(self, input_size=7, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)   # hn: (num_layers, batch, hidden_size)
        out = self.fc(hn[-1])       # use output from last LSTM layer
        return out

model = GlucoseLSTM()

# === 5. Define loss and optimizer ===
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# === 4. Define the LSTM model ===
class GlucoseLSTM(nn.Module):
    def __init__(self, input_size=7, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)   # hn: (num_layers, batch, hidden_size)
        out = self.fc(hn[-1])       # use output from last LSTM layer
        return out

model = GlucoseLSTM()

# === 5. Define loss and optimizer ===
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [19]:
pip install torchmetrics




In [12]:
import torch

# Optional: use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # Ensure model is moved after initialization

n_epochs = 5

for epoch in range(n_epochs):
    model.train()
    total_loss = 0.0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)  # weighted by batch size

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = loss_fn(pred, yb)
            val_loss += loss.item() * xb.size(0)

    avg_train_loss = total_loss / len(train_loader.dataset)
    avg_val_loss = val_loss / len(val_loader.dataset)

    print(f"📅 Epoch {epoch+1}/{n_epochs} | 🏋️ Train Loss: {avg_train_loss:.4f} | 🧪 Val Loss: {avg_val_loss:.4f}")

# === Final Test Evaluation ===
model.eval()
test_loss = 0.0
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)
        test_loss += loss.item() * xb.size(0)

avg_test_loss = test_loss / len(test_loader.dataset)
print(f"🧪 Final Test Loss: {avg_test_loss:.4f}")


📅 Epoch 1/5 | 🏋️ Train Loss: 0.0047 | 🧪 Val Loss: 0.0028
📅 Epoch 2/5 | 🏋️ Train Loss: 0.0025 | 🧪 Val Loss: 0.0027
📅 Epoch 3/5 | 🏋️ Train Loss: 0.0025 | 🧪 Val Loss: 0.0024
📅 Epoch 4/5 | 🏋️ Train Loss: 0.0024 | 🧪 Val Loss: 0.0024
📅 Epoch 5/5 | 🏋️ Train Loss: 0.0024 | 🧪 Val Loss: 0.0023
🧪 Final Test Loss: 0.0023


In [21]:
from torchmetrics import MeanSquaredError

# Initialize metric and move to device
mse_metric = MeanSquaredError().to(device)

model.eval()
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        mse_metric.update(pred, yb)

final_mse = mse_metric.compute().item()
print(f"🧪 Final Test MSE: {final_mse:.4f}")

🧪 Final Test MSE: 0.1365


In [7]:
torch.save(model.state_dict(), "glucose_predictor.pt")
model = GlucoseLSTM()
model.load_state_dict(torch.load("glucose_predictor.pt"))
model.eval()


GlucoseLSTM(
  (lstm): LSTM(7, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)