In [None]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import os

# Function to simulate one day of vitals, wearable, and emotion data for one patient
def simulate_patient_day(patient_id, start_time, minutes=1440):
    timestamps = [start_time + timedelta(minutes=i) for i in range(minutes)]

    # Vitals (simulate with realistic normal ranges)
    heart_rate = np.random.normal(loc=75, scale=10, size=minutes)
    spo2 = np.random.normal(loc=97, scale=1.5, size=minutes)
    bp_sys = np.random.normal(loc=115, scale=10, size=minutes)
    bp_dia = np.random.normal(loc=75, scale=8, size=minutes)
    resp_rate = np.random.normal(loc=16, scale=3, size=minutes)
    temp = np.random.normal(loc=36.8, scale=0.3, size=minutes)

    # Wearables
    steps = np.random.poisson(lam=12, size=minutes)
    acc_x = np.random.normal(0, 1, size=minutes)
    acc_y = np.random.normal(0, 1, size=minutes)
    acc_z = np.random.normal(0, 1, size=minutes)
    activity_levels = np.random.choice(['sedentary', 'light', 'moderate', 'vigorous'],
                                       size=minutes, p=[0.4, 0.3, 0.2, 0.1])

    # Emotion
    emotions = np.random.choice(['neutral', 'happy', 'sad', 'angry'],
                                size=minutes, p=[0.5, 0.2, 0.2, 0.1])
    voice_pitch = np.random.normal(loc=150, scale=30, size=minutes)
    facial_intensity = np.random.uniform(0.2, 0.9, size=minutes)

    # Risk logic (simple rule-based)
    risk_alert = (
        (heart_rate > 100) |
        (spo2 < 92) |
        (bp_sys > 140) |
        (bp_dia < 60) |
        (resp_rate > 25) |
        (temp > 38) |
        (emotions == 'angry') |
        (emotions == 'sad')
    ).astype(int)

    df = pd.DataFrame({
        'patient_id': patient_id,
        'timestamp': timestamps,
        'heart_rate': heart_rate,
        'spo2': spo2,
        'bp_sys': bp_sys,
        'bp_dia': bp_dia,
        'resp_rate': resp_rate,
        'temperature': temp,
        'steps': steps,
        'acc_x': acc_x,
        'acc_y': acc_y,
        'acc_z': acc_z,
        'activity_level': activity_levels,
        'emotion': emotions,
        'voice_pitch': voice_pitch,
        'facial_intensity': facial_intensity,
        'risk_alert': risk_alert
    })

    return df

# Simulate and save data in batches
def simulate_patients_batch(total_patients=10000, batch_size=500, save_path="/content/guardian_data"):
    os.makedirs(save_path, exist_ok=True)
    start_time = datetime(2025, 1, 1, 0, 0)

    for i in range(0, total_patients, batch_size):
        batch_data = []
        for j in range(batch_size):
            pid = i + j
            df = simulate_patient_day(pid, start_time)
            batch_data.append(df)
        combined_df = pd.concat(batch_data)
        file_path = f"{save_path}/guardian_data_patients_{i}_{i+batch_size-1}.csv"
        combined_df.to_csv(file_path, index=False)
        print(f"Saved batch: {file_path}")

# Example: Run for first 10000 patients in 2 batches of 5000
simulate_patients_batch(total_patients=10000, batch_size=5000, save_path="/content/guardian_data")
