### Data generation code

In [None]:
import numpy as np, pandas as pd, os
from datetime import datetime, timedelta

# --- tiny helpers ---
def _ar1(n, mu, phi, sigma, rng):
    x = np.empty(n); x[0] = rng.normal(mu, sigma)
    for t in range(1, n): x[t] = mu + phi*(x[t-1]-mu) + rng.normal(0, sigma)
    return x

def _meal_plan(n, start_time, interval_min, rng):
    ts = np.array([start_time + timedelta(minutes=i*interval_min) for i in range(n)])
    day = np.array([(t - ts[0]).days for t in ts]); tod = np.array([t.hour*60 + t.minute for t in ts])
    anchors = {"breakfast": 8*60, "lunch": 13*60, "dinner": 19*60}
    portions = ["small","normal","large"]; types = ["balanced","high_carb","high_protein"]
    kcal_base = {"small":350,"normal":650,"large":950,"snack":220}

    meal_event = np.array(["none"]*n, object)
    portion_size = np.array(["none"]*n, object)
    meal_kcal = np.full(n, np.nan)

    for d in np.unique(day):
        plan = [("breakfast", rng.choice(types, p=[0.6,0.25,0.15]), rng.choice(portions, p=[0.3,0.6,0.1])),
                ("lunch",     rng.choice(types, p=[0.5,0.35,0.15]), rng.choice(portions, p=[0.2,0.6,0.2])),
                ("dinner",    rng.choice(types, p=[0.4,0.35,0.25]), rng.choice(portions, p=[0.2,0.5,0.3]))]
        if rng.random()<0.25: plan.append(("snack", rng.choice(["balanced","high_carb"], p=[0.4,0.6]), "small"))
        for lab, mtype, port in plan:
            base = anchors.get(lab, rng.integers(10*60, 17*60)) + rng.integers(-20,21)
            idxs = np.where(day==d)[0]; rel = np.argmin(np.abs(tod[idxs]-base)); b = idxs[rel]
            base_k = kcal_base["snack"] if lab=="snack" else kcal_base[port]
            kcal = float(max(100, rng.normal(base_k, base_k*0.12)))
            meal_event[b] = f"{lab}:{mtype}"; portion_size[b] = port; meal_kcal[b] = kcal
    return meal_event, portion_size, meal_kcal

def _apply_meal_effects(n, interval_min, meal_kcal, meal_event):
    eff_hr = np.zeros(n); eff_rr = np.zeros(n); eff_sys = np.zeros(n); eff_tmp = np.zeros(n); eff_spo = np.zeros(n)
    tau = max(1, int(round(90/interval_min))); win = max(1, int(round(180/interval_min)))
    tscale = {"balanced":1.0,"high_carb":1.25,"high_protein":0.8}
    for i, kcal in enumerate(meal_kcal):
        if np.isnan(kcal): continue
        mtype = meal_event[i].split(":")[1]; scale = (kcal/600.0) * tscale[mtype]
        end = min(n, i+win); dec = np.exp(-np.arange(0, end-i)/tau)
        eff_hr[i:end]  += 3.0*scale*dec;  eff_rr[i:end]  += 0.6*scale*dec
        eff_sys[i:end] += -4.0*scale*dec; eff_tmp[i:end] += 0.03*scale*dec; eff_spo[i:end] += -0.10*scale*dec
    return eff_hr, eff_rr, eff_sys, eff_tmp, eff_spo

# --- simulate one patient over multiple days, 10-min cadence, smooth vitals ---
def simulate_patient_period(patient_id, start_time, days=7, interval_min=10, seed=None):
    rng = np.random.default_rng(seed)
    n = days * (24*60 // interval_min)
    ts = [start_time + timedelta(minutes=i*interval_min) for i in range(n)]

    # baselines + smooth AR(1) cores
    hr = _ar1(n, rng.normal(75,6), 0.95, 1.2, rng)
    rr = _ar1(n, rng.normal(16,2), 0.95, 0.3, rng)
    sbp= _ar1(n, rng.normal(115,8),0.97, 1.5, rng)
    dbp= _ar1(n, rng.normal(75,6), 0.97, 1.2, rng)
    spo= _ar1(n, rng.normal(97.2,1),0.98, 0.15, rng)
    tmp= _ar1(n, rng.normal(36.75,0.15),0.98,0.03, rng)

    # circadian (simple sine)
    phase = rng.integers(0,1440); mins = np.arange(n)*interval_min + phase
    hr += 2.0*np.sin(2*np.pi*mins/1440); rr += 0.6*np.sin(2*np.pi*mins/1440); tmp += 0.12*np.sin(2*np.pi*mins/1440)

    # activity, steps, acc
    acts = np.random.choice(['sedentary','light','moderate','vigorous'], size=n, p=[0.45,0.30,0.18,0.07])
    steps = np.array([rng.poisson({ 'sedentary':1,'light':6,'moderate':15,'vigorous':30 }[a]) for a in acts])
    scale = {'sedentary':0.8,'light':1.0,'moderate':1.4,'vigorous':1.8}
    acc_x = rng.normal(0,[scale[a] for a in acts]); acc_y = rng.normal(0,[scale[a] for a in acts]); acc_z = rng.normal(0,[scale[a] for a in acts])

    # emotion (simple): independent draw
    emos = np.random.choice(['neutral','happy','sad','angry'], size=n, p=[0.55,0.2,0.17,0.08])
    emo_risk = np.isin(emos, ['sad','angry'])
    hr += np.where(emo_risk, 3.0, 0); rr += np.where(emo_risk, 1.2, 0); spo += np.where(emo_risk, -0.5, 0)

    # meals (type/portion/kcal) + effects
    meal_event, portion_size, meal_kcal = _meal_plan(n, start_time, interval_min, rng)
    e_hr, e_rr, e_sys, e_tmp, e_spo = _apply_meal_effects(n, interval_min, meal_kcal, meal_event)
    hr += e_hr; rr += e_rr; sbp += e_sys; tmp += e_tmp; spo += e_spo

    # per-step change caps (10-min cadence)
    def capdiff(x, m):
        x = x.copy()
        for i in range(1,len(x)):
            dx = x[i]-x[i-1]
            if dx>m: x[i]=x[i-1]+m
            elif dx<-m: x[i]=x[i-1]-m
        return x
    hr=capdiff(hr,8); rr=capdiff(rr,2); sbp=capdiff(sbp,6); dbp=capdiff(dbp,5); spo=capdiff(spo,1); tmp=capdiff(tmp,0.15)

    # physio limits
    hr=np.clip(hr,35,200); rr=np.clip(rr,5,45); sbp=np.clip(sbp,80,220); dbp=np.clip(dbp,40,130); spo=np.clip(spo,85,100); tmp=np.clip(tmp,35,41.5)

    # risk after all effects
    risk = ((hr>100)|(spo<92)|(sbp>140)|(dbp<60)|(rr>25)|(tmp>38)|(emos=='angry')|(emos=='sad')).astype(int)

    return pd.DataFrame({
        'patient_id':patient_id,'timestamp':ts,'heart_rate':hr,'spo2':spo,'bp_sys':sbp,'bp_dia':dbp,
        'resp_rate':rr,'temperature':tmp,'steps':steps,'acc_x':acc_x,'acc_y':acc_y,'acc_z':acc_z,
        'activity_level':acts,'emotion':emos,'meal_event':meal_event,'portion_size':portion_size,'meal_kcal':meal_kcal,
        'risk_alert':risk
    })

# --- batch (fewer patients, longer time, 10-min sampling) ---
def simulate_patients_batch(total_patients=20, batch_size=10, days=7, interval_min=10, save_path="/content/guardian_long10"):
    os.makedirs(save_path, exist_ok=True)
    start_time = datetime(2025,1,1,0,0); rng = np.random.default_rng(123)
    for i in range(0, total_patients, batch_size):
        frames=[]; end=min(i+batch_size,total_patients)
        for pid in range(i,end):
            frames.append(simulate_patient_period(pid, start_time, days=days, interval_min=interval_min, seed=int(rng.integers(0,2**31-1))))
        df=pd.concat(frames, ignore_index=True)
        fp=f"{save_path}/guardian_data_patients_{i}_{end-1}.csv"
        df.to_csv(fp, index=False)
        print(f"Saved: {fp} ({len(df):,} rows)")

# =========================
# RUN + VERIFY (saves CSVs)
# =========================
simulate_patients_batch(  # <-- this actually runs the generator
    total_patients=20,
    batch_size=10,
    days=7,
    interval_min=10,
    save_path="/content/guardian_long10"
)

print(os.listdir("/content/guardian_long10"))        # list saved files
df = pd.read_csv("/content/guardian_long10/guardian_data_patients_0_9.csv", parse_dates=["timestamp"])
print(df.shape); print(df.head(3))


Saved: /content/guardian_long10/guardian_data_patients_0_9.csv (10,080 rows)
Saved: /content/guardian_long10/guardian_data_patients_10_19.csv (10,080 rows)
['guardian_data_patients_10_19.csv', 'guardian_data_patients_0_9.csv']
(10080, 18)
   patient_id           timestamp  heart_rate       spo2      bp_sys  \
0           0 2025-01-01 00:00:00   66.687255  93.387315  123.557887   
1           0 2025-01-01 00:10:00   67.950527  93.567264  126.491388   
2           0 2025-01-01 00:20:00   67.336967  93.547759  123.428209   

      bp_dia  resp_rate  temperature  steps     acc_x     acc_y     acc_z  \
0  70.546474  14.768082    36.658913     17 -2.350771  1.120603  2.676971   
1  71.052605  15.060559    36.647068      5  0.755058  0.282750  1.805386   
2  70.869006  14.806552    36.662748     34  2.307669  0.193827 -2.959593   

  activity_level emotion meal_event portion_size  meal_kcal  risk_alert  
0       moderate   angry       none         none        NaN           1  
1          ligh