In [1]:
# Kaggle environment already has major libs. Install extras:
!pip install darts  fastapi uvicorn
!pip install -U pinecone
!pip install -U langchain langchain-core langchain-community langgraph
!pip install langchain langchain-openai

import os
import pandas as pd
import numpy as np

# For time-series modeling
import darts
from darts import TimeSeries

# For ML
import torch
import tensorflow as tf

# For LLM agent framework
from langchain import OpenAI
import langgraph

# For vector retrieval
import pinecone

print("Setup complete!")




2025-07-04 21:30:58.154116: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751664658.178149     307 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751664658.185868     307 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Setup complete!


In [2]:
import pandas as pd
import glob
import os

# Path to all files
data_path = '/kaggle/input/cgm-dataset/Preprocessed'
csv_files = sorted(glob.glob(os.path.join(data_path, '*.csv')))

all_data = []

for file in csv_files:
    patient_id = os.path.basename(file).replace(".csv", "")
    
    # Read with semicolon delimiter
    df = pd.read_csv(file, delimiter=';', parse_dates=['time'])
    
    df['patient_id'] = patient_id
    all_data.append(df)

# Combine all into one DataFrame
cgm_df = pd.concat(all_data, ignore_index=True)

# Preview
print("Total records:", len(cgm_df))
print("Columns:", cgm_df.columns.tolist())
cgm_df.head()


Total records: 309392
Columns: ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input', 'patient_id']


Unnamed: 0,time,glucose,calories,heart_rate,steps,basal_rate,bolus_volume_delivered,carb_input,patient_id
0,2018-06-13 18:40:00,332.0,6.3595,82.322835,34.0,0.091667,0.0,0.0,HUPA0001P
1,2018-06-13 18:45:00,326.0,7.728,83.740157,0.0,0.091667,0.0,0.0,HUPA0001P
2,2018-06-13 18:50:00,330.0,4.7495,80.52518,0.0,0.091667,0.0,0.0,HUPA0001P
3,2018-06-13 18:55:00,324.0,6.3595,89.129032,20.0,0.091667,0.0,0.0,HUPA0001P
4,2018-06-13 19:00:00,306.0,5.152,92.495652,0.0,0.075,0.0,0.0,HUPA0001P


In [3]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Parameters
past_window = 72        # 6 hours @ 5-min intervals
future_horizon = 6      # 30 min into the future
step = 1                # sliding window stride

features = ['glucose', 'calories', 'heart_rate', 'steps',
            'basal_rate', 'bolus_volume_delivered', 'carb_input']

X, y = [], []
patient_scalers = {}  # Optional: keep scalers for each patient for later inference

for patient_id, group in cgm_df.groupby('patient_id'):
    group = group.sort_values('time').reset_index(drop=True)

    # Fill missing values
    data = group[features].fillna(method='ffill').fillna(method='bfill')

    # Scale features PER PATIENT (key for generalization and avoiding leakage)
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)

    # Optional: save scaler per patient for inference use later
    patient_scalers[patient_id] = scaler

    # Sliding window
    for i in range(0, len(data_scaled) - past_window - future_horizon + 1, step):
        past_seq = data_scaled[i : i + past_window]
        future_val = data_scaled[i + past_window + future_horizon - 1][0]  # glucose only
        X.append(past_seq)
        y.append(future_val)

X = np.array(X)
y = np.array(y)

print("Shape of X:", X.shape)  # (samples, 72, features)
print("Shape of y:", y.shape)  # (samples,)


  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data = group[features].fillna(method='ffill').fillna(method='bfill')
  data

Shape of X: (307467, 72, 7)
Shape of y: (307467,)


In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split

# === 1. Convert to tensors ===
X_tensor = torch.tensor(X, dtype=torch.float32)   # Shape: (N, 72, 7)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Shape: (N, 1)

# === 2. Split into training and validation ===
dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Optional: for reproducibility
torch.manual_seed(42)
train_ds, val_ds = random_split(dataset, [train_size, val_size])

# === 3. Create DataLoaders ===
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=128)

# === 4. Define the LSTM model ===
class GlucoseLSTM(nn.Module):
    def __init__(self, input_size=7, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)   # hn: (num_layers, batch, hidden_size)
        out = self.fc(hn[-1])       # use output from last LSTM layer
        return out

model = GlucoseLSTM()

# === 5. Define loss and optimizer ===
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [5]:
import torch

# Optional: use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

n_epochs = 5

for epoch in range(n_epochs):
    model.train()
    total_loss = 0.0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)  # weighted by batch size

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = loss_fn(pred, yb)
            val_loss += loss.item() * xb.size(0)

    avg_train_loss = total_loss / len(train_loader.dataset)
    avg_val_loss = val_loss / len(val_loader.dataset)

    print(f"📅 Epoch {epoch+1}/{n_epochs} | 🏋️ Train Loss: {avg_train_loss:.4f} | 🧪 Val Loss: {avg_val_loss:.4f}")


📅 Epoch 1/5 | 🏋️ Train Loss: 0.0042 | 🧪 Val Loss: 0.0027
📅 Epoch 2/5 | 🏋️ Train Loss: 0.0025 | 🧪 Val Loss: 0.0024
📅 Epoch 3/5 | 🏋️ Train Loss: 0.0025 | 🧪 Val Loss: 0.0024
📅 Epoch 4/5 | 🏋️ Train Loss: 0.0024 | 🧪 Val Loss: 0.0023
📅 Epoch 5/5 | 🏋️ Train Loss: 0.0023 | 🧪 Val Loss: 0.0023


In [6]:
torch.save(model.state_dict(), "glucose_predictor.pt")
model = GlucoseLSTM()
model.load_state_dict(torch.load("glucose_predictor.pt"))
model.eval()


GlucoseLSTM(
  (lstm): LSTM(7, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [7]:
from sklearn.preprocessing import MinMaxScaler
import torch
import numpy as np

# Choose a patient ID
#patient_id = 'HUPA0020P'

# Get full sorted data for that patient
patient_data = cgm_df[cgm_df['patient_id'] == patient_id].sort_values('time')

# Select relevant features and fill gaps
features = ['glucose', 'calories', 'heart_rate', 'steps',
            'basal_rate', 'bolus_volume_delivered', 'carb_input']
data = patient_data[features].fillna(method='ffill').fillna(method='bfill')

# Fit scaler on full patient history (same as training logic)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# Take the last 72 time steps
if len(data_scaled) < 72:
    raise ValueError("Not enough data for inference (requires at least 6 hours).")
last_72 = data_scaled[-72:]  # (72, 7)

# Convert to tensor for model
input_tensor = torch.tensor(last_72, dtype=torch.float32).unsqueeze(0)  # (1, 72, 7)


  data = patient_data[features].fillna(method='ffill').fillna(method='bfill')


In [8]:
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Your model definition
class GlucoseLSTM(nn.Module):
    def __init__(self, input_size=7, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

# Load trained model
model = GlucoseLSTM()
model.load_state_dict(torch.load("glucose_predictor.pt"))
model.eval()


GlucoseLSTM(
  (lstm): LSTM(7, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [21]:
from typing import TypedDict
from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda
from langchain_openai import ChatOpenAI

# --- Define state structure ---
class AgentState(TypedDict):
    patient_id: str
    input_tensor: torch.Tensor
    raw_patient_data: pd.DataFrame  # ✅ This was missing before
    predicted_glucose: float
    risk_level: str
    trend_note: str
    advice: str


# --- LangGraph Nodes ---

def predict_node(state: AgentState):
    input_tensor = state["input_tensor"]

    with torch.no_grad():
        pred = model(input_tensor)

    # Denormalize (fit scaler on full history)
    patient_data = state["raw_patient_data"]
    glucose_vals = patient_data['glucose'].fillna(method='ffill').values.reshape(-1, 1)
    scaler = MinMaxScaler()
    scaler.fit(glucose_vals)
    pred_glucose = scaler.inverse_transform([[pred.item()]])[0][0]

    return {"predicted_glucose": pred_glucose}

def classify_risk(state: AgentState):
    glucose = state["predicted_glucose"]
    if glucose < 70:
        level = "Low"
    elif glucose > 180:
        level = "High"
    else:
        level = "Normal"
    return {"risk_level": level}

# GPT-4o Advice Node
llm = ChatOpenAI(model="gpt-4o-mini", api_key="sk-...", temperature=0.2)

def coach_node(state: AgentState):
    glucose = state["predicted_glucose"]
    risk = state["risk_level"]
    prompt = f"Predicted glucose is {glucose:.1f} mg/dL ({risk} risk). Give clinical advice."
    response = llm.invoke(prompt)
    return {"advice": response.content}

# 📈 Trend Analysis Node
def trend_node(state: AgentState):
    patient_data = state["raw_patient_data"].copy()
    patient_data = patient_data.sort_values("time").reset_index(drop=True)

    recent_glucose = patient_data["glucose"].fillna(method='ffill').tail(6).values
    current_avg = recent_glucose.mean()
    predicted = state["predicted_glucose"]

    if predicted > current_avg + 10:
        trend = "rising"
    elif predicted < current_avg - 10:
        trend = "falling"
    else:
        trend = "stable"

    return {"trend_note": trend}


In [22]:
from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda

# --- Build LangGraph ---
graph = StateGraph(AgentState)

# Add Nodes
graph.add_node("Predict", RunnableLambda(predict_node))
graph.add_node("Classify", RunnableLambda(classify_risk))
graph.add_node("Trend", RunnableLambda(trend_node))       # ✅ Add trend analysis
graph.add_node("Coach", RunnableLambda(coach_node))

# Set Flow
graph.set_entry_point("Predict")
graph.add_edge("Predict", "Classify")
graph.add_edge("Classify", "Trend")
graph.add_edge("Trend", "Coach")
graph.add_edge("Coach", END)

# Compile the graph
graph = graph.compile()


In [25]:
# Example: pick a patient
patient_id = 'HUPA0015P'
patient_data = cgm_df[cgm_df['patient_id'] == patient_id].sort_values('time')

features = ['glucose', 'calories', 'heart_rate', 'steps',
            'basal_rate', 'bolus_volume_delivered', 'carb_input']
data = patient_data[features].fillna(method='ffill').fillna(method='bfill')

# Fit scaler on full history for normalization
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# Get last 72 time steps
window = data_scaled[-72:]
input_tensor = torch.tensor(window, dtype=torch.float32).unsqueeze(0)


  data = patient_data[features].fillna(method='ffill').fillna(method='bfill')


In [26]:
result = graph.invoke({
    "patient_id": patient_id,               # ✅ Include this
    "input_tensor": input_tensor,
    "raw_patient_data": patient_data
})

print("🔮 Predicted Glucose:", result["predicted_glucose"])
print("🚦 Risk Level:", result["risk_level"])
print("📈 Trend Note:", result["trend_note"])           # Optional if you added trend node
print("🧠 Advice from GPT-4o:\n", result["advice"])


  glucose_vals = patient_data['glucose'].fillna(method='ffill').values.reshape(-1, 1)
  recent_glucose = patient_data["glucose"].fillna(method='ffill').tail(6).values


🔮 Predicted Glucose: 230.7909470796585
🚦 Risk Level: High
📈 Trend Note: falling
🧠 Advice from GPT-4o:
 A predicted glucose level of 230.8 mg/dL is considered high and may indicate hyperglycemia, which can be a sign of diabetes or other metabolic disorders. Here are some clinical recommendations:

1. **Immediate Actions**:
   - **Check Blood Glucose**: If not already done, confirm the blood glucose level with a fingerstick or lab test.
   - **Hydration**: Encourage the patient to drink water to help lower blood glucose levels and prevent dehydration.

2. **Dietary Management**:
   - **Avoid Sugary Foods**: Advise the patient to avoid foods and beverages high in sugar and refined carbohydrates.
   - **Balanced Meals**: Recommend a diet rich in whole grains, lean proteins, healthy fats, and plenty of vegetables.
   - **Portion Control**: Encourage smaller, more frequent meals to help manage blood sugar levels.

3. **Physical Activity**:
   - **Exercise**: Suggest incorporating regular phy