In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)
n = 7000

# Base physiology
RR = np.random.normal(24, 6, n)
SpO2 = np.random.normal(92, 4, n)
FiO2 = np.random.normal(45, 20, n)
HR = np.random.normal(100, 20, n)
MAP = np.random.normal(75, 12, n)
Lactate = np.random.normal(2.5, 1.5, n)
GCS = np.random.normal(13, 2.5, n)

# Trends
RR_slope = np.random.normal(1.0, 2.0, n)
SpO2_slope = np.random.normal(-1.0, 2.0, n)
FiO2_slope = np.random.normal(2.0, 4.0, n)
Lactate_change = np.random.normal(0.3, 0.7, n)

# Oxygen device (ordinal severity)
device = np.random.choice([0,1,2,3], n)  # 0=RA,1=NC,2=HFNC,3=NIV

# Risk model
risk = (
    0.05*RR -
    0.07*SpO2 +
    0.04*FiO2 +
    0.9*Lactate -
    0.04*MAP -
    0.25*GCS +
    0.4*RR_slope -
    0.5*SpO2_slope +
    0.4*Lactate_change +
    0.6*device
)

prob = 1 / (1 + np.exp(-risk))
y = np.random.binomial(1, prob)

# Force ~30% positive
threshold = np.percentile(prob, 70)
y = (prob > threshold).astype(int)

# Add 5% label noise
flip_idx = np.random.choice(n, int(0.05*n), replace=False)
y[flip_idx] = 1 - y[flip_idx]

df = pd.DataFrame({
    "RR": RR,
    "SpO2": SpO2,
    "FiO2": FiO2,
    "SpO2_FiO2_ratio": SpO2 / FiO2,
    "HR": HR,
    "MAP": MAP,
    "Lactate": Lactate,
    "GCS": GCS,
    "RR_slope": RR_slope,
    "SpO2_slope": SpO2_slope,
    "FiO2_slope": FiO2_slope,
    "Lactate_change": Lactate_change,
    "Oxygen_Device": device,
    "Intubation_6h": y
})

In [2]:
df.to_csv("synthetic_ulmis_data.csv", index=False)