In [1]:
# ============================================
# Guardian Sprint 2 (Updated): Behaviour + Vitals
# LSTM anomaly + RF/MLP behavioural classifier
# with clinical-vital rule overlay for risk
# ============================================

# ---- Install (first time per environment) ----
try:
    import torch, pandas, sklearn
except Exception:
    import sys
    !{sys.executable} -m pip install --quiet torch scikit-learn pandas numpy joblib

# ---- Imports & Config ----
import os, json, math, re, warnings, glob
from typing import List, Tuple
import numpy as np
import pandas as pd
from IPython.display import display

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

warnings.filterwarnings("ignore")

# Optional PyTorch for LSTM
try:
    import torch
    from torch import nn
    TORCH_OK = True
except Exception:
    TORCH_OK = False
    torch = None
    nn = None

print("Working dir:", os.getcwd())
print("PyTorch available:", TORCH_OK)

# --- Settings ---
ENGINE = "lstm"    # "lstm" or "iforest" (auto-fallback if torch unavailable)
CLF    = "rf"      # "rf" or "mlp"
SEQ_LEN       = 14
TEST_SIZE     = 0.2
RANDOM_STATE  = 42

# Canonical names used by the pipeline
ID_COL = "user_id"
TS_COL = "timestamp"

# Behavioural features
BEHAV_FEATURES = ["steps","calorie_intake","sleep_hours","water_intake","bathroom_visits"]

# Vital features (parsed/normalized)
VITAL_FEATURES = ["heart_rate","spo2","temperature","bp_sys","bp_dia","meals_skipped","exercise_minutes"]

# Full model feature set
FULL_FEATURES = BEHAV_FEATURES + VITAL_FEATURES

OUTDIR = os.path.join(".", "artifacts")
os.makedirs(OUTDIR, exist_ok=True)

# -----------------------------
# Dataset discovery (same dir)
# -----------------------------
def find_dataset_here() -> Tuple[str, str]:
    candidates = []
    for ext in ("*.csv", "*.xlsx", "*.xls"):
        candidates += glob.glob(os.path.join(".", "**", ext), recursive=True)
    if not candidates:
        raise FileNotFoundError("No CSV/Excel found in current directory.")
    def score(path):
        n = os.path.basename(path).lower()
        s = 0
        if "new ai spreadsheet" in n: s += 5
        if "sheet1" in n: s += 2
        if "alert" in n or "alerts" in n: s += 2
        if n.endswith(".csv"): s += 1
        return -s
    candidates.sort(key=score)
    chosen = candidates[0]
    kind = "csv" if chosen.lower().endswith(".csv") else "excel"
    return chosen, kind

DATA_PATH, DATA_KIND = find_dataset_here()
print("Detected dataset:", DATA_PATH, "| kind:", DATA_KIND)

# -----------------------------
# Load & normalize columns
# -----------------------------
def _read_any(path: str, kind: str) -> pd.DataFrame:
    if kind == "csv":
        return pd.read_csv(path)
    else:
        try:
            return pd.read_excel(path, sheet_name="Sheet1")
        except Exception:
            return pd.read_excel(path)

_bp_re = re.compile(r"^\s*(\d+)\s*[/\-]\s*(\d+)\s*$")

def parse_bp(val):
    if pd.isna(val): return (np.nan, np.nan)
    if isinstance(val, (int, float)):  # unexpected numeric; treat as systolic only
        return (float(val), np.nan)
    m = _bp_re.match(str(val))
    if m:
        return (float(m.group(1)), float(m.group(2)))
    return (np.nan, np.nan)

def load_dataset_from_file(path: str, kind: str) -> pd.DataFrame:
    raw = _read_any(path, kind)
    raw.columns = [c.strip() for c in raw.columns]

    # Rename YOUR columns -> canonical names
    # Falls back to observationEnd if observationStart missing
    rename_map = {
        "patientId": ID_COL,
        "observationStart": TS_COL if "observationStart" in raw.columns else None,
        "observationEnd":   TS_COL if ("observationStart" not in raw.columns and "observationEnd" in raw.columns) else None,
        "stepsTaken":       "steps",
        "calorieIntake":    "calorie_intake",
        "sleepHours":       "sleep_hours",
        "waterIntakeMl":    "water_intake",
        "bathroomVisits":   "bathroom_visits",
        "heartRate":        "heart_rate",
        "spo2":             "spo2",
        "temperature":      "temperature",
        "bloodPressure":    "bloodPressure",  # parse later to bp_sys/bp_dia
        "mealsSkipped":     "meals_skipped",
        "exerciseMinutes":  "exercise_minutes",
    }
    # Drop Nones
    rename_map = {k:v for k,v in rename_map.items() if v is not None}

    # Minimal column presence checks
    base_required = ["patientId","stepsTaken","calorieIntake","sleepHours","waterIntakeMl","bathroomVisits"]
    missing_base = [c for c in base_required if c not in raw.columns]
    if missing_base:
        raise ValueError(f"Dataset missing required behavioural columns: {missing_base}\nHave: {list(raw.columns)}")

    if ("observationStart" not in raw.columns) and ("observationEnd" not in raw.columns):
        # Create synthetic sequential timestamps if neither available
        raw["_seq"] = range(len(raw))
        raw["observationStart"] = pd.Timestamp("2025-01-01") + pd.to_timedelta(raw["_seq"], unit="D")
        raw.drop(columns=["_seq"], inplace=True)

    df = raw.rename(columns=rename_map)

    # Ensure ID & TS exist
    if ID_COL not in df.columns:
        df[ID_COL] = "user_1"
    if TS_COL not in df.columns:
        raise ValueError("No timestamp column found or derived.")

    # Parse time
    df[TS_COL] = pd.to_datetime(df[TS_COL], errors="coerce")
    if df[TS_COL].isna().any():
        bad = df.loc[df[TS_COL].isna(), [TS_COL]].head(3)
        raise ValueError(f"Some {TS_COL} values could not be parsed. Examples:\n{bad}")

    # Parse blood pressure -> bp_sys, bp_dia
    if "bloodPressure" in df.columns:
        bp_pairs = df["bloodPressure"].apply(parse_bp)
        df["bp_sys"] = bp_pairs.map(lambda t: t[0])
        df["bp_dia"] = bp_pairs.map(lambda t: t[1])
        df.drop(columns=["bloodPressure"], inplace=True, errors="ignore")
    else:
        # If column missing entirely, create NaNs
        df["bp_sys"] = np.nan
        df["bp_dia"] = np.nan

    # Convert units / numerics
    df["water_intake"] = pd.to_numeric(df["water_intake"], errors="coerce") / 1000.0  # mL -> L
    numeric_cols = [
        "steps","calorie_intake","sleep_hours","bathroom_visits",
        "heart_rate","spo2","temperature","bp_sys","bp_dia","meals_skipped","exercise_minutes"
    ]
    for c in numeric_cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    # Sort
    df[ID_COL] = df[ID_COL].fillna("user_1").astype(str)
    df = df.sort_values([ID_COL, TS_COL]).reset_index(drop=True)

    # Fill small gaps per user (transform keeps index)
    for c in FULL_FEATURES:
        if c in df.columns:
            df[c] = df.groupby(ID_COL)[c].transform(lambda s: s.interpolate(limit_direction="both"))
            df[c] = df[c].fillna(method="bfill").fillna(method="ffill")

    # Any remaining NaNs -> 0
    for c in FULL_FEATURES:
        if c in df.columns:
            df[c] = df[c].fillna(0)

    # Final set
    keep = [ID_COL, TS_COL] + FULL_FEATURES
    missing_final = [c for c in keep if c not in df.columns]
    if missing_final:
        raise ValueError(f"Missing final columns after processing: {missing_final}")
    return df[keep].copy()

df = load_dataset_from_file(DATA_PATH, DATA_KIND)
print("Rows:", len(df), "| Columns:", list(df.columns))
display(df.head(8))

# -----------------------------
# Sequence builders & utils
# -----------------------------
def make_sequences(df_user: pd.DataFrame, seq_len: int, feature_cols: List[str], ts_col: str):
    X, ends = [], []
    vals = df_user[feature_cols].values.astype(float)
    for i in range(len(vals) - seq_len + 1):
        X.append(vals[i:i+seq_len])
        ends.append(df_user.iloc[i+seq_len-1][ts_col])
    return np.array(X), np.array(ends)

def stack_all_users(df: pd.DataFrame, seq_len: int, feature_cols: List[str], id_col: str, ts_col: str):
    X_all, ends_all, owners = [], [], []
    for uid, g in df.groupby(id_col):
        X, ends = make_sequences(g, seq_len, feature_cols, ts_col)
        if len(X) == 0:
            continue
        X_all.append(X)
        ends_all.append(ends)
        owners += [uid]*len(X)
    if not X_all:
        raise ValueError("No sequences created. Reduce SEQ_LEN or check data frequency.")
    return np.vstack(X_all), np.concatenate(ends_all), np.array(owners)

def minmax_scale(x):
    lo, hi = np.min(x), np.max(x)
    if math.isclose(hi, lo):
        return np.zeros_like(x)
    return (x - lo) / (hi - lo)

def percentile_thresholds(errs: np.ndarray):
    return {
        "p90": float(np.percentile(errs, 90)),
        "p95": float(np.percentile(errs, 95)),
        "p97": float(np.percentile(errs, 97)),
        "p99": float(np.percentile(errs, 99)),
    }

# -----------------------------
# LSTM Autoencoder (optional)
# -----------------------------
class LSTMAE(nn.Module):
    def __init__(self, n_features, hidden=32, latent=16):
        super().__init__()
        self.encoder = nn.LSTM(n_features, hidden, batch_first=True)
        self.enc_linear = nn.Linear(hidden, latent)
        self.decoder = nn.LSTM(n_features, hidden, batch_first=True)
        self.dec_linear = nn.Linear(hidden, n_features)
    def forward(self, x):
        enc_out, _ = self.encoder(x)
        _ = self.enc_linear(enc_out)  # latent (unused explicitly)
        dec_out, _ = self.decoder(x)
        out = self.dec_linear(dec_out)
        return out

def train_lstm_autoencoder(X_train, n_epochs=30, lr=1e-3, batch=128, device="cpu"):
    n_features = X_train.shape[-1]
    model = LSTMAE(n_features)
    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    ds = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32))
    dl = torch.utils.data.DataLoader(ds, batch_size=batch, shuffle=True)
    model.train()
    for epoch in range(1, n_epochs+1):
        losses = []
        for (xb,) in dl:
            xb = xb.to(device)
            pred = model(xb)
            loss = loss_fn(pred, xb)
            opt.zero_grad()
            loss.backward()
            opt.step()
            losses.append(loss.item())
        if epoch % 5 == 0:
            print(f"[LSTM AE] epoch {epoch:02d} | loss {np.mean(losses):.6f}")
    return model

def recon_error(model, X, device="cpu"):
    model.eval()
    with torch.no_grad():
        X_t = torch.tensor(X, dtype=torch.float32).to(device)
        out = model(X_t).cpu().numpy()
    err = ((out - X)**2).mean(axis=(1,2))
    return err

# -----------------------------
# Behavioural+Vitals features for classifier
# -----------------------------
def add_features_for_classifier(df: pd.DataFrame, id_col: str, feature_cols: List[str], ts_col: str) -> pd.DataFrame:
    g = df.sort_values([id_col, ts_col]).copy()
    for f in feature_cols:
        g[f+"_delta"]  = g.groupby(id_col)[f].transform(lambda s: s.diff())
        g[f+"_rmean7"] = g.groupby(id_col)[f].transform(lambda s: s.rolling(7, min_periods=1).mean())
        rstd           = g.groupby(id_col)[f].transform(lambda s: s.rolling(7, min_periods=2).std())
        g[f+"_rz7"]    = (g[f] - g[f+"_rmean7"]) / rstd.replace(0, np.nan)
    g = g.replace([np.inf, -np.inf], np.nan).fillna(0)
    return g

def build_classifier(name: str):
    if name == "rf":
        return RandomForestClassifier(
            n_estimators=300, max_depth=None, min_samples_split=4,
            random_state=RANDOM_STATE, n_jobs=-1
        )
    elif name == "mlp":
        return MLPClassifier(hidden_layer_sizes=(64,32), activation="relu",
                             max_iter=300, random_state=RANDOM_STATE)
    else:
        raise ValueError("CLF must be 'rf' or 'mlp'")

# -----------------------------
# Risk mapping (model + rules)
# -----------------------------
def map_risk_levels_model(anom_scores: np.ndarray, clf_probs: np.ndarray) -> list:
    risks = []
    for a, p in zip(anom_scores, clf_probs):
        if (a >= 0.80) or (p >= 0.80): risks.append("High")
        elif (a >= 0.60) or (p >= 0.60): risks.append("Medium")
        else: risks.append("Low")
    return risks

# Simple clinical overlays (heuristics) to bump risk if vitals look concerning
def vital_rule_row(row):
    flags = []
    # SpO2 low
    if "spo2" in row and pd.notna(row["spo2"]) and row["spo2"] < 92:
        flags.append("Low SpO₂")
    # Fever
    if "temperature" in row and pd.notna(row["temperature"]) and row["temperature"] >= 38.0:
        flags.append("Fever")
    # Tachy/Brady (simple)
    if "heart_rate" in row and pd.notna(row["heart_rate"]) and (row["heart_rate"] > 120 or row["heart_rate"] < 45):
        flags.append("Abnormal HR")
    # Hypertension (simple thresholds)
    if "bp_sys" in row and "bp_dia" in row and pd.notna(row["bp_sys"]) and pd.notna(row["bp_dia"]):
        if (row["bp_sys"] >= 160) or (row["bp_dia"] >= 100):
            flags.append("High BP")
    # Hydration (very low water intake) / meals skipped (contextual)
    if "water_intake" in row and pd.notna(row["water_intake"]) and row["water_intake"] < 1.0:
        flags.append("Low hydration")
    if "meals_skipped" in row and pd.notna(row["meals_skipped"]) and row["meals_skipped"] >= 2:
        flags.append("Meals skipped")
    # Return overlay risk level + reasons
    if any(f in flags for f in ["Low SpO₂","Fever","Abnormal HR","High BP"]):
        return ("High", flags)
    if any(f in flags for f in ["Low hydration","Meals skipped"]):
        return ("Medium", flags)
    return ("Low", flags)

def combine_risks(model_risk: str, vital_risk: str) -> str:
    order = {"Low":0,"Medium":1,"High":2}
    return model_risk if order[model_risk] >= order[vital_risk] else vital_risk

# -----------------------------
# Pipeline: scale, anomaly, classifier, alerts
# -----------------------------
# 1) Scale full feature set
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[FULL_FEATURES] = scaler.fit_transform(df_scaled[FULL_FEATURES])
joblib.dump(scaler, os.path.join(OUTDIR, "scaler.pkl"))

# 2) Anomaly engine (LSTM or IsolationForest) on FULL_FEATURES
engine_used = ENGINE
if engine_used == "lstm" and not TORCH_OK:
    print("⚠️ PyTorch not found; falling back to IsolationForest.")
    engine_used = "iforest"

if engine_used == "lstm":
    X_seq, ends, owners = stack_all_users(df_scaled, SEQ_LEN, FULL_FEATURES, ID_COL, TS_COL)
    device = "cuda" if (TORCH_OK and torch.cuda.is_available()) else "cpu"
    model = train_lstm_autoencoder(X_seq, n_epochs=30, lr=1e-3, batch=128, device=device)
    torch.save(model.state_dict(), os.path.join(OUTDIR, "lstm.pt"))
    errs = recon_error(model, X_seq, device=device)
    seq_scores = pd.DataFrame({ID_COL: owners, TS_COL: ends, "recon_error": errs})
    seq_scores["anom_score"] = minmax_scale(seq_scores["recon_error"].values)
    th = percentile_thresholds(errs)
else:
    iforest = IsolationForest(n_estimators=300, contamination=0.05, random_state=RANDOM_STATE)
    iforest.fit(df_scaled[FULL_FEATURES])
    scores = -iforest.score_samples(df_scaled[FULL_FEATURES])  # higher => more anomalous
    seq_scores = df_scaled[[ID_COL, TS_COL]].copy()
    seq_scores["recon_error"] = scores
    seq_scores["anom_score"] = minmax_scale(scores)
    joblib.dump(iforest, os.path.join(OUTDIR, "iforest.pkl"))
    th = percentile_thresholds(scores)

with open(os.path.join(OUTDIR, "thresholds.json"), "w") as f:
    json.dump(th, f, indent=2)

print("Engine used:", engine_used)
display(seq_scores.head())

# 3) Classifier features (behaviour + vitals engineered)
feats_df = add_features_for_classifier(df.copy(), ID_COL, FULL_FEATURES, TS_COL)

clf_cols = []
for f in FULL_FEATURES:
    clf_cols += [f, f+"_delta", f+"_rmean7", f+"_rz7"]

# Labels: use 'label' if present; else weak labels from anomaly top 5%
if "label" in feats_df.columns:
    labels = feats_df["label"].astype(int)
else:
    tmp = feats_df[[ID_COL, TS_COL]].merge(
        seq_scores[[ID_COL, TS_COL, "anom_score"]],
        on=[ID_COL, TS_COL], how="left"
    )
    anom = tmp["anom_score"].fillna(0).values
    cutoff = np.percentile(anom, 95)
    labels = (anom >= cutoff).astype(int)
    feats_df["label"] = labels

X_beh = feats_df[clf_cols].fillna(0).values
y = labels.values if isinstance(labels, pd.Series) else labels

X_train, X_test, y_train, y_test = train_test_split(
    X_beh, y, test_size=TEST_SIZE, random_state=RANDOM_STATE,
    stratify=y if (y.sum()>0 and y.sum()<len(y)) else None
)

clf = build_classifier(CLF)
clf.fit(X_train, y_train)
print("[Classifier] Evaluation:")
if y_test.sum()>0 and y_test.sum()<len(y_test):
    print(classification_report(y_test, clf.predict(X_test), digits=3))
joblib.dump(clf, os.path.join(OUTDIR, "clf.pkl"))

# 4) Produce alerts (model risk + vital overlay)
out_df = feats_df[[ID_COL, TS_COL]].merge(
    seq_scores[[ID_COL, TS_COL, "anom_score"]],
    on=[ID_COL, TS_COL], how="left"
).sort_values([ID_COL, TS_COL])

out_df["anom_score"] = out_df["anom_score"].fillna(0.0)

# Classifier probs
if hasattr(clf, "predict_proba"):
    probs = clf.predict_proba(feats_df[clf_cols])[:, 1]
else:
    rawp = clf.decision_function(feats_df[clf_cols])
    probs = minmax_scale(rawp)
out_df["clf_prob"] = probs

# Model-only risk
out_df["model_risk"] = map_risk_levels_model(out_df["anom_score"].values, out_df["clf_prob"].values)

# Attach raw vitals for rule overlay
for c in ["spo2","temperature","heart_rate","bp_sys","bp_dia","water_intake","meals_skipped"]:
    if c in df.columns:
        out_df[c] = df.set_index([ID_COL, TS_COL])[c].reindex(out_df.set_index([ID_COL, TS_COL]).index).values

# Vital overlays
vital_levels = []
vital_reasons = []
for _, row in out_df.iterrows():
    lvl, flags = vital_rule_row(row)
    vital_levels.append(lvl)
    vital_reasons.append(", ".join(flags) if flags else "")
out_df["vital_risk"] = vital_levels

# Final risk = max(model_risk, vital_risk)
def combine(a,b):
    order={"Low":0,"Medium":1,"High":2}
    return a if order[a]>=order[b] else b
out_df["risk_level"] = [combine(a,b) for a,b in zip(out_df["model_risk"], out_df["vital_risk"])]

# Reasons
def reason(row):
    rs = []
    if row["anom_score"] >= 0.8: rs.append("High sequence anomaly")
    elif row["anom_score"] >= 0.6: rs.append("Moderate sequence anomaly")
    if row["clf_prob"] >= 0.8: rs.append("Classifier: strong behavioural anomaly")
    elif row["clf_prob"] >= 0.6: rs.append("Classifier: possible behavioural anomaly")
    if isinstance(row.get("vital_risk",""), str) and row["vital_risk"] in ("Medium","High") and row.get("vital_risk", ""):
        if row.get("vital_risk", "") == "High" and row.get("vital_risk",""):
            pass
    if row.get("vital_risk","") in ("Medium","High") and row.get("vital_risk",""):
        if row.get("vital_risk","") == "High" and row.get("vital_risk",""):
            # already captured by flags
            pass
    if row.get("vital_risk","") in ("Medium","High") and row.get("vital_risk",""):
        if row.get("vital_risk","") == "High":
            pass
    if row.get("vital_risk","") in ("Medium","High") and row.get("vital_risk",""):
        # add vital flags
        if row.get("vital_risk",""):
            if row.get("vital_risk","") in ("Medium","High"):
                if row.get("vital_risk",""):
                    if row.get("vital_risk",""):
                        if row.get("vital_risk",""):
                            pass
    if row.get("vital_risk","") in ("Medium","High"):
        if row.get("vital_risk","") == "High":
            rs.append("Vitals rule: high concern")
        else:
            rs.append("Vitals rule: moderate concern")
    if isinstance(row.get("vital_reasons",""), str) and row["vital_reasons"]:
        rs.append(row["vital_reasons"])
    return "; ".join([r for r in rs if r])

out_df["vital_reasons"] = vital_reasons
out_df["reason"] = [
    "; ".join(filter(None, [
        "High sequence anomaly" if s>=0.8 else ("Moderate sequence anomaly" if s>=0.6 else ""),
        "Classifier: strong behavioural anomaly" if p>=0.8 else ("Classifier: possible behavioural anomaly" if p>=0.6 else ""),
        ("Vitals rule: " + ("high concern" if vr=="High" else "moderate concern")) if vr in ("Medium","High") else "",
        vrz if vrz else ""
    ]))
    for s,p,vr,vrz in zip(out_df["anom_score"], out_df["clf_prob"], out_df["vital_risk"], out_df["vital_reasons"])
]

# Save alerts
out_cols = [ID_COL, TS_COL, "anom_score", "clf_prob", "model_risk", "vital_risk", "risk_level", "reason"]
alerts_path = os.path.join(OUTDIR, "alerts.csv")
out_df[out_cols].to_csv(alerts_path, index=False)

print(f"\nSaved to {OUTDIR}:")
print(" - scaler.pkl")
print(f" - {'lstm.pt' if engine_used=='lstm' else 'iforest.pkl'}")
print(" - clf.pkl")
print(" - thresholds.json")
print(" - alerts.csv")

display(out_df.head(20))
display(out_df["risk_level"].value_counts())


Working dir: /Users/harshadamarla/Documents/T2 Docs/Team Project - A/AlertSystemTask
PyTorch available: True
Detected dataset: ./New AI spreadsheet - Sheet1.csv | kind: csv
Rows: 1920 | Columns: ['user_id', 'timestamp', 'steps', 'calorie_intake', 'sleep_hours', 'water_intake', 'bathroom_visits', 'heart_rate', 'spo2', 'temperature', 'bp_sys', 'bp_dia', 'meals_skipped', 'exercise_minutes']


Unnamed: 0,user_id,timestamp,steps,calorie_intake,sleep_hours,water_intake,bathroom_visits,heart_rate,spo2,temperature,bp_sys,bp_dia,meals_skipped,exercise_minutes
0,P0001,2025-06-01 00:00:00+00:00,1364,416,0.1,0.411,3,0.0,0.0,0.0,0.0,0.0,0,5
1,P0001,2025-06-01 06:00:00+00:00,1630,550,1.7,0.616,3,0.0,0.0,0.0,0.0,0.0,1,16
2,P0001,2025-06-01 12:00:00+00:00,1810,752,1.1,0.65,1,0.0,0.0,0.0,0.0,0.0,0,6
3,P0001,2025-06-01 18:00:00+00:00,686,786,0.1,0.356,3,0.0,0.0,0.0,0.0,0.0,0,10
4,P0001,2025-06-02 00:00:00+00:00,712,590,0.2,0.254,3,0.0,0.0,0.0,0.0,0.0,1,4
5,P0001,2025-06-02 06:00:00+00:00,108,376,2.5,0.202,6,0.0,0.0,0.0,0.0,0.0,2,0
6,P0001,2025-06-02 12:00:00+00:00,1823,450,0.1,0.672,1,0.0,0.0,0.0,0.0,0.0,0,8
7,P0001,2025-06-02 18:00:00+00:00,1,299,1.5,0.332,7,0.0,0.0,0.0,0.0,0.0,1,0


[LSTM AE] epoch 05 | loss 0.376015
[LSTM AE] epoch 10 | loss 0.182103
[LSTM AE] epoch 15 | loss 0.081334
[LSTM AE] epoch 20 | loss 0.029583
[LSTM AE] epoch 25 | loss 0.015272
[LSTM AE] epoch 30 | loss 0.012086
Engine used: lstm


Unnamed: 0,user_id,timestamp,recon_error,anom_score
0,P0001,2025-06-04 06:00:00+00:00,0.011086,0.262127
1,P0001,2025-06-04 12:00:00+00:00,0.012494,0.315042
2,P0001,2025-06-04 18:00:00+00:00,0.013,0.33403
3,P0001,2025-06-05 00:00:00+00:00,0.013136,0.339169
4,P0001,2025-06-05 06:00:00+00:00,0.013717,0.361005


[Classifier] Evaluation:
              precision    recall  f1-score   support

           0      0.951     1.000     0.975       365
           1      0.000     0.000     0.000        19

    accuracy                          0.951       384
   macro avg      0.475     0.500     0.487       384
weighted avg      0.903     0.951     0.926       384


Saved to ./artifacts:
 - scaler.pkl
 - lstm.pt
 - clf.pkl
 - thresholds.json
 - alerts.csv


Unnamed: 0,user_id,timestamp,anom_score,clf_prob,model_risk,spo2,temperature,heart_rate,bp_sys,bp_dia,water_intake,meals_skipped,vital_risk,risk_level,vital_reasons,reason
0,P0001,2025-06-01 00:00:00+00:00,0.0,0.036111,Low,0.0,0.0,0.0,0.0,0.0,0.411,0,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
1,P0001,2025-06-01 06:00:00+00:00,0.0,0.005913,Low,0.0,0.0,0.0,0.0,0.0,0.616,1,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
2,P0001,2025-06-01 12:00:00+00:00,0.0,0.0,Low,0.0,0.0,0.0,0.0,0.0,0.65,0,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
3,P0001,2025-06-01 18:00:00+00:00,0.0,0.0,Low,0.0,0.0,0.0,0.0,0.0,0.356,0,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
4,P0001,2025-06-02 00:00:00+00:00,0.0,0.001111,Low,0.0,0.0,0.0,0.0,0.0,0.254,1,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
5,P0001,2025-06-02 06:00:00+00:00,0.0,0.0,Low,0.0,0.0,0.0,0.0,0.0,0.202,2,High,High,"Low SpO₂, Abnormal HR, Low hydration, Meals sk...","Vitals rule: high concern; Low SpO₂, Abnormal ..."
6,P0001,2025-06-02 12:00:00+00:00,0.0,0.009167,Low,0.0,0.0,0.0,0.0,0.0,0.672,0,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
7,P0001,2025-06-02 18:00:00+00:00,0.0,0.0,Low,0.0,0.0,0.0,0.0,0.0,0.332,1,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
8,P0001,2025-06-03 00:00:00+00:00,0.0,0.012083,Low,0.0,0.0,0.0,0.0,0.0,0.779,0,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."
9,P0001,2025-06-03 06:00:00+00:00,0.0,0.005,Low,0.0,0.0,0.0,0.0,0.0,0.383,1,High,High,"Low SpO₂, Abnormal HR, Low hydration","Vitals rule: high concern; Low SpO₂, Abnormal ..."


risk_level
High    1920
Name: count, dtype: int64