In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

N_PATIENTS = 20
RECORDS_PER_PATIENT = 4
START_DATE = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
TIMES = [0, 6, 12, 18]
RISK_ORDER = (["low"] * (N_PATIENTS//3) +
              ["medium"] * (N_PATIENTS//3) +
              ["high"] * (N_PATIENTS - 2*(N_PATIENTS//3)))
np.random.seed(42)
rng = np.random.default_rng(42)

columns = [
    'patientId','age','gender','observationStart','observationEnd','nursingNote',
    'medications','heartRate','spo2','temperature','bloodPressure','stepsTaken',
    'calorieIntake','sleepHours','waterIntakeMl','mealsSkipped','exerciseMinutes',
    'bathroomVisits','behaviourTags','emotionTags','clinicalSummary','entitiesExtracted',
    'baselineStats','state'
]

# Pools for categorical/text columns (20+ options each)
nursingNote_pool = [
    "Vitals stable, no complaints.", "Patient reports mild pain.", "Assisted with mobility, resting comfortably.",
    "Patient refused meal.", "No new findings.", "No fever, taking fluids well.", "Reports dizziness on standing.",
    "Dressing changed, wound healing well.", "Noted cough, dry in nature.", "Requested family visit.",
    "Mild nausea, antiemetic given.", "Oriented to time/place.", "No abnormal behaviour.", "Pain scale 3/10.",
    "Tolerating oral medications.", "Ambulating with walker.", "Slept 6 hours overnight.", "Bladder scan done, WNL.",
    "Observed mild anxiety.", "Allergies: None reported.", "Bed rails up for safety.", "Encouraged deep breathing.",
    "BP elevated in AM.", "Reports improved appetite."
]
medications_pool = [
    "None", "Aspirin", "Antibiotic", "Paracetamol", "Ibuprofen", "Metformin", "Atorvastatin", "Amoxicillin",
    "Lisinopril", "Amlodipine", "Simvastatin", "Omeprazole", "Losartan", "Metoprolol", "Doxycycline", "Vitamin D",
    "Furosemide", "Gabapentin", "Levothyroxine", "Prednisone", "Albuterol", "Hydrochlorothiazide", "Clopidogrel", "Sertraline"
]
behaviourTags_pool = [
    "calm", "active", "restless", "anxious", "withdrawn", "cooperative", "agitated", "hostile", "apathetic", "confused",
    "alert", "euphoric", "irritable", "tearful", "delirious", "cheerful", "impulsive", "disoriented", "suspicious",
    "inattentive", "responsive", "distractible", "argumentative"
]
emotionTags_pool = [
    "happy", "sad", "frustrated", "neutral", "calm", "tense", "worried", "relaxed", "cheerful", "angry", "hopeful",
    "disappointed", "excited", "fearful", "confident", "embarrassed", "lonely", "bored", "content", "guilty",
    "ashamed", "optimistic"
]
clinicalSummary_pool = [
    "No acute distress. Continue monitoring.", "Reports mild discomfort, analgesia given.", "Restless at night, reassurance provided.",
    "Blood pressure slightly elevated, no intervention needed.", "No new symptoms. Encouraged fluid intake.", "Stable post-procedure, no complications observed.",
    "Eating well, tolerates solid foods.", "Needs minimal assistance with ADLs.", "Labs within normal limits, review in 1 week.",
    "No medication side effects noted.", "Urine output satisfactory.", "Wound site clean, minimal drainage.",
    "Patient anxious about upcoming tests, reassurance provided.", "Oxygen discontinued, breathing comfortably.",
    "Physical therapy session completed, gait improving.", "Reported sleep interrupted by pain.", "Allergies reviewed and updated.",
    "Vital signs within patient baseline.", "Mood appears improved, socializing with others.", "Instructed on home exercise plan.",
    "Pain controlled with oral analgesics.", "Skin assessment completed, no breakdown.", "Discussed care plan with family.",
    "Awaiting specialist consult."
]
entitiesExtracted_pool = [
    "none", "hypertension", "fatigue", "pain", "nausea", "cough", "fever", "dizziness", "anxiety", "headache", "infection",
    "wound", "medication reaction", "fall", "confusion", "shortness of breath", "UTI", "mobility issue", "rash",
    "dehydration", "sleep disturbance", "constipation", "urinary retention"
]
baselineStats_pool = [
    "baseline normal", "elevated BP", "reduced sleep", "increased steps", "reduced appetite", "improved mobility",
    "fluctuating HR", "decreased spo2", "stable labs", "improved diet", "reduced anxiety", "weight loss", "weight gain",
    "improved strength", "worsening pain", "resolved cough", "new rash", "stable ADLs", "improved balance", "reduced confusion",
    "improved hydration", "stable glucose"
]
state_pool = [
    "active", "resting", "asleep", "alert", "discharged", "transferred", "awaiting test", "in procedure", "recovering",
    "under observation", "stable", "critical", "pending consult", "home", "admitted", "pre-op", "post-op", "rehab", "isolation",
    "fall risk", "end of life"
]

rows = []

for i in range(N_PATIENTS):
    pid = f"P{i+1:04d}"
    risk = RISK_ORDER[i]
    age = int(np.random.randint(25, 85))
    gender = np.random.choice(["Male", "Female"])
    bathroomVisits = int(np.random.randint(2, 8))
    day0 = START_DATE + timedelta(days=i)

    for j, hour in enumerate(TIMES):
        start_ts = day0 + timedelta(hours=hour)
        end_ts = start_ts + timedelta(hours=6)
        row = {}

        row["patientId"] = pid
        row["age"] = age
        row["gender"] = gender
        row["observationStart"] = start_ts.strftime("%Y-%m-%d %H:%M")
        row["observationEnd"] = end_ts.strftime("%Y-%m-%d %H:%M")
        row["nursingNote"] = np.random.choice(nursingNote_pool)
        row["medications"] = np.random.choice(medications_pool)
        row["heartRate"] = int(np.random.normal(75 if risk=="low" else 90 if risk=="medium" else 110, 6))
        row["spo2"] = int(np.random.normal(98 if risk=="low" else 95 if risk=="medium" else 90, 2))
        row["temperature"] = round(np.random.normal(36.7 if risk=="low" else 37.5 if risk=="medium" else 38.3, 0.5), 1)
        row["bloodPressure"] = f"{int(np.random.normal(120 if risk=='low' else 135 if risk=='medium' else 150, 10))}/" + \
                               f"{int(np.random.normal(80 if risk=='low' else 90 if risk=='medium' else 100, 5))}"
        row["stepsTaken"] = int(np.random.randint(7000, 12000) if risk=="low" else np.random.randint(3500, 7000) if risk=="medium" else np.random.randint(500, 3500))
        row["calorieIntake"] = int(np.random.randint(1800, 2600) if risk=="low" else np.random.randint(1300, 1800) if risk=="medium" else np.random.randint(800, 1300))
        row["sleepHours"] = round(np.random.normal(7.5 if risk=="low" else 6 if risk=="medium" else 4.5, 1), 1)
        row["waterIntakeMl"] = int(np.random.randint(1500, 3000) if risk=="low" else np.random.randint(1000, 1800) if risk=="medium" else np.random.randint(400, 1000))
        row["mealsSkipped"] = int(np.random.randint(0, 2) if risk=="low" else np.random.randint(1, 3) if risk=="medium" else np.random.randint(2, 4))
        row["exerciseMinutes"] = int(np.random.randint(30, 60) if risk=="low" else np.random.randint(10, 30) if risk=="medium" else np.random.randint(0, 15))
        row["bathroomVisits"] = bathroomVisits
        row["behaviourTags"] = np.random.choice(behaviourTags_pool)
        row["emotionTags"] = np.random.choice(emotionTags_pool)
        row["clinicalSummary"] = np.random.choice(clinicalSummary_pool)
        row["entitiesExtracted"] = np.random.choice(entitiesExtracted_pool)
        row["baselineStats"] = np.random.choice(baselineStats_pool)
        row["state"] = np.random.choice(state_pool)

        rows.append(row)

df = pd.DataFrame(rows, columns=columns)
df.to_csv("Synthetic_20_patients.csv", index=False)
print("Saved Synthetic_20_patients.csv")


Saved Synthetic_20_patients.csv
