In [1]:
# ============================
# train_model.py
# ============================
# pip install pandas numpy scikit-learn joblib

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
from pathlib import Path

# ----------------------------
# 1A) (Optional) Generate synthetic training data if you don't have any
#     This will create 'health_data.csv' with engineered-features per window
# ----------------------------

def make_synthetic_dataset(n_rows=1500, seed=42):
    rng = np.random.default_rng(seed)
    rows = []

    for _ in range(n_rows):
        # Latent "true" state
        has_fever       = rng.random() < 0.25
        has_hypoxemia   = rng.random() < 0.18
        has_tachy       = rng.random() < 0.22
        has_brady       = (not has_tachy) and (rng.random() < 0.10)
        has_ecg_abn     = rng.random() < 0.20

        # Generate window statistics consistent with states
        hr_base  = 75 + rng.normal(0, 6)
        if has_tachy: hr_base = 105 + rng.normal(0, 8)
        if has_brady: hr_base = 52 + rng.normal(0, 5)

        hr_std   = abs(rng.normal(4, 2))
        if has_ecg_abn: hr_std += rng.uniform(2, 8)

        rr_std   = abs(rng.normal(0.05, 0.03))
        if has_ecg_abn: rr_std += rng.uniform(0.05, 0.15)

        spo2_mean = 97 + rng.normal(0, 1)
        if has_hypoxemia: spo2_mean = 90 + rng.normal(0, 2)
        spo2_min  = spo2_mean - abs(rng.normal(0.8, 0.7))
        spo2_min  = max(80, min(100, spo2_min))

        temp_max  = 36.7 + abs(rng.normal(0.2, 0.2))
        if has_fever: temp_max = 38.1 + abs(rng.normal(0.4, 0.3))

        ecg_abn_frac = rng.beta(1 + (3 if has_ecg_abn else 0),
                                6 + (0 if has_ecg_abn else 3))
        # Clip features to physiological ranges
        hr_base = float(np.clip(hr_base, 35, 180))
        hr_std  = float(np.clip(hr_std, 0.1, 30))
        rr_std  = float(np.clip(rr_std, 0.005, 0.5))
        temp_max = float(np.clip(temp_max, 35.0, 40.5))
        spo2_min = float(np.clip(spo2_min, 75, 100))
        ecg_abn_frac = float(np.clip(ecg_abn_frac, 0, 1))

        # Build row
        rows.append({
            "HR_mean": hr_base,
            "HR_std": hr_std,
            "RR_std": rr_std,
            "SpO2_min": spo2_min,
            "SpO2_mean": spo2_mean,
            "Temp_max": temp_max,
            "ECG_abn_frac": ecg_abn_frac,
            # Labels (multi-label)
            "Tachycardia": int(has_tachy and hr_base >= 100),
            "Bradycardia": int(has_brady and hr_base <= 55),
            "Fever": int(has_fever and temp_max >= 37.5),
            "Hypoxemia": int(has_hypoxemia and spo2_min <= 92),
            "Abnormal_ECG": int(has_ecg_abn or (rr_std >= 0.10) or (ecg_abn_frac >= 0.2)),
        })

    df = pd.DataFrame(rows)
    df.to_csv("health_data.csv", index=False)
    return df

csv_path = Path("health_data.csv")
if not csv_path.exists():
    print("[i] No dataset found. Generating synthetic 'health_data.csv'...")
    df = make_synthetic_dataset()
else:
    df = pd.read_csv(csv_path)

# ----------------------------
# 1B) Train/Test split
# ----------------------------
FEATURES = ["HR_mean","HR_std","RR_std","SpO2_min","SpO2_mean","Temp_max","ECG_abn_frac"]
LABELS   = ["Tachycardia","Fever","Hypoxemia","Bradycardia","Abnormal_ECG"]

X = df[FEATURES]
y = df[LABELS]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=17, stratify=y.sum(axis=1) > 0
)

# ----------------------------
# 1C) Build a pipeline: scale -> RandomForest (wrapped for multi-label)
#     (RF doesn't need scaling, but we add it for future model swaps)
# ----------------------------
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    min_samples_leaf=2,
    class_weight=None,
    random_state=17,
    n_jobs=-1
)

pipe = Pipeline([
    ("scaler", StandardScaler(with_mean=True, with_std=True)),
    ("clf", MultiOutputClassifier(rf))
])

pipe.fit(X_train, y_train)

# ----------------------------
# 1D) Evaluate & Save
# ----------------------------
y_pred = pipe.predict(X_test)
print("\n=== Evaluation (per label) ===")
print(classification_report(y_test, y_pred, target_names=LABELS, zero_division=0))

joblib.dump({
    "pipeline": pipe,
    "features": FEATURES,
    "labels": LABELS,
    "window_size": 10  # expected window length for live inference (configurable)
}, "multi_label_health_model.pkl")

print("\n[i] Saved model to 'multi_label_health_model.pkl'")
print("[i] Training complete.")



=== Evaluation (per label) ===
              precision    recall  f1-score   support

 Tachycardia       1.00      1.00      1.00        52
       Fever       1.00      1.00      1.00        89
   Hypoxemia       0.97      1.00      0.98        31
 Bradycardia       1.00      1.00      1.00        15
Abnormal_ECG       1.00      1.00      1.00        98

   micro avg       1.00      1.00      1.00       285
   macro avg       0.99      1.00      1.00       285
weighted avg       1.00      1.00      1.00       285
 samples avg       0.68      0.68      0.68       285


[i] Saved model to 'multi_label_health_model.pkl'
[i] Training complete.


In [5]:
# Example exporter snippet
# pip install micromlgen
from micromlgen import port
from sklearn.tree import DecisionTreeClassifier
import joblib, pandas as pd

bundle = joblib.load("multi_label_health_model.pkl")
# Train small per-label trees on your original training set (X_train, y_train)
FEATURES = ["HR_mean","HR_std","RR_std","SpO2_min","SpO2_mean","Temp_max","ECG_abn_frac"]
LABELS   = ["Tachycardia","Fever","Hypoxemia","Bradycardia","Abnormal_ECG"]

# X_train, y_train should be your window-level features & label columns
# (Recreate them or save during training.)
for label in LABELS:
    dt = DecisionTreeClassifier(max_depth=6, min_samples_leaf=5, random_state=0)
    dt.fit(X_train[FEATURES], y_train[label])
    code = port(dt, classmap={0:"NO", 1:"YES"})
    with open(f"model_{label}.h", "w") as f:
        f.write(code)