In [14]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import pickle
from sklearn.preprocessing import StandardScaler

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

# Make sure this matches the model you trained
class CNNLSTMRegressor(nn.Module):
    def __init__(self, n_features, cnn_channels=32, lstm_hidden=64,
                 lstm_layers=1, dropout=0.3):
        super().__init__()

        self.conv1 = nn.Conv1d(
            in_channels=n_features,
            out_channels=cnn_channels,
            kernel_size=3,
            padding=1
        )
        self.relu = nn.ReLU()

        self.lstm = nn.LSTM(
            input_size=cnn_channels,
            hidden_size=lstm_hidden,
            num_layers=lstm_layers,
            batch_first=True,
            dropout=dropout if lstm_layers > 1 else 0.0,
        )

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(lstm_hidden, 1)

    def forward(self, x):
        # x: [B, T, F]
        x = x.transpose(1, 2)              # [B, F, T]
        x = self.relu(self.conv1(x))       # [B, C, T]
        x = x.transpose(1, 2)              # [B, T, C]
        out, _ = self.lstm(x)              # [B, T, H]
        last = out[:, -1, :]               # [B, H]
        last = self.dropout(last)
        out = self.fc(last).squeeze(1)     # [B]
        return out


Using device: cpu


In [15]:
# Load your final dataset
df = pd.read_csv("final_insomnia_dataset.csv")
df["DATE"] = pd.to_datetime(df["DATE"])
df = df.sort_values(["person_id", "DATE"]).reset_index(drop=True)

# Same feature columns used in training
feature_cols = [
    "bp_systolic", "bp_diastolic",
    "ACTIVITY_steps", "ACTIVITY_distance",
    "ACTIVITY_soft", "ACTIVITY_moderate", "ACTIVITY_intense",
    "HR_bpm",
    "sleep_hours",
    "sleep_efficiency_proxy",
    "awakenings_proxy",
    "stress_level",
    "spo2",
]

df[feature_cols] = df[feature_cols].fillna(df[feature_cols].mean())

n_features = len(feature_cols)

# Load scaler
with open("insomnia_scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Recreate and load model
model = CNNLSTMRegressor(n_features=n_features).to(DEVICE)
state_dict = torch.load("insomnia_cnn_lstm_model.pt", map_location=DEVICE)
model.load_state_dict(state_dict)
model.eval()

print("Model and scaler loaded.")


Model and scaler loaded.


In [16]:
SEQ_LEN = 21

# Find a person with at least 21 days
valid_pids = []
for pid, g in df.groupby("person_id"):
    if len(g) >= SEQ_LEN:
        valid_pids.append(pid)

print("People with at least 21 days:", valid_pids[:10])

# Choose one person (e.g., first one)
person_id = valid_pids[-1]
g = df[df["person_id"] == person_id].sort_values("DATE").reset_index(drop=True)

print(f"Using person_id = {person_id}, total days = {len(g)}")

# Take the LAST 21 days for this person
window = g.tail(SEQ_LEN)
window_dates = window["DATE"].tolist()
true_score = float(window["insomnia_score"].iloc[-1])
true_label = int(window["insomnia_label"].iloc[-1])

X_window = window[feature_cols].values.astype("float32")  # [21, F]


People with at least 21 days: [1487.0, 2201.0, 2210.0, 3379.0, 4891.0, 5359.0, 5544.0, 6008.0, 6777.0, 7359.0]
Using person_id = 9999.0, total days = 80


In [17]:
# Scale using the fitted scaler
X_scaled = scaler.transform(X_window.reshape(-1, n_features)).reshape(1, SEQ_LEN, n_features)

# Convert to tensor
X_tensor = torch.tensor(X_scaled, dtype=torch.float32).to(DEVICE)

# Predict
with torch.no_grad():
    pred = model(X_tensor)
    pred_score = float(pred.item())

print("Window dates:", window_dates[0], "to", window_dates[-1])
print(f"True insomnia_score: {true_score:.4f}")
print(f"Pred insomnia_score: {pred_score:.4f}")


Window dates: 2022-06-20 00:00:00 to 2022-07-10 00:00:00
True insomnia_score: 0.4994
Pred insomnia_score: 0.4973


In [18]:
def score_to_label(score):
    if score < 0.25:
        return 0
    elif score < 0.55:
        return 1
    else:
        return 2

pred_label = score_to_label(pred_score)

print(f"True label: {true_label}  |  Predicted label: {pred_label}")


True label: 1  |  Predicted label: 1


In [21]:
sample = pd.read_csv("sample_low_risk_21.csv")
sample = sample.sort_values("DATE")  # just in case

X_sample = sample[feature_cols].values.astype("float32")

# Scale & reshape
X_sample_scaled = scaler.transform(X_sample.reshape(-1, n_features)).reshape(1, SEQ_LEN, n_features)
X_sample_tensor = torch.tensor(X_sample_scaled, dtype=torch.float32).to(DEVICE)

with torch.no_grad():
    pred = model(X_sample_tensor)
    pred_score = float(pred.item())

true_score = float(sample["insomnia_score"].iloc[-1])
true_label = int(sample["insomnia_label"].iloc[-1])

print(f"[Sample High Risk] True score: {true_score:.4f} | Pred score: {pred_score:.4f}")
print(f"[Sample High Risk] True label: {true_label} | Pred label: {score_to_label(pred_score)}")


[Sample High Risk] True score: 0.2235 | Pred score: 0.2232
[Sample High Risk] True label: 0 | Pred label: 0
