In [4]:
import pandas as pd
import random
import numpy as np

# Set seed for reproducibility
random.seed(42)
np.random.seed(42)

# Define options
mode_of_arrival = ["Walk-in", "Private vehicle", "Ambulance"]
chief_complaints = [
    "Chest pain", "Difficulty breathing", "Fever", "Seizure or loss of consciousness",
    "Abdominal pain", "Injury", "Headache", "Vomiting or diarrhea",
    "Weakness or fatigue", "Pregnancy-related complication", "Psychiatric/behavioral emergency", "Other"
]

avpu_scale = ["Alert", "Voice", "Pain", "Unresponsive"]
triage_category = ["emergency", "urgent", "non-urgent"]

n_per_class = 200

# Helper functions
def generate_logical_avpu(chief_complaint):
    if chief_complaint == "Seizure or loss of consciousness":
        return random.choices(["Voice", "Pain", "Unresponsive"], weights=[0.1, 0.3, 0.7])[0]
    return random.choice(avpu_scale)

def generate_logical_chief_complaint(sex, age):
    if sex == "Female" and age <= 50:
        return random.choice(chief_complaints)
    else:
        return random.choice([cc for cc in chief_complaints if cc != "Pregnancy-related complication"])

def generate_logical_active_bleeding(chief_complaint):
    if chief_complaint == "Injury":
        return random.choices(["Yes", "No"], weights=[0.8, 0.2])[0]
    elif chief_complaint == "Pregnancy-related complication":
        return random.choices(["Yes", "No"], weights=[0.6, 0.4])[0]
    else:
        return random.choices(["Yes", "No"], weights=[0.1, 0.9])[0]

def generate_patient(triage_class):
    age = random.randint(18, 80)
    sex = random.choice(["Male", "Female"])
    chief_complaint = generate_logical_chief_complaint(sex, age)
    active_bleeding = generate_logical_active_bleeding(chief_complaint)
    pregnancy = random.choice(["Yes", "No"]) if sex == "Female" and 12 <= age <= 50 else 'No'


    if triage_class == "emergency":
        respiratory_rate = int(np.random.normal(23, 5))
        heart_rate = int(np.random.normal(100, 10))
        systolic_bp = int(np.random.normal(130, 10))
        diastolic_bp = int(np.random.normal(90, 6))
        temperature = round(np.random.normal(38, 0.4), 1)
        oxygen_saturation = int(np.random.normal(90, 3))
        avpu = random.choices(avpu_scale, weights=[0.1, 0.2, 0.3, 0.4])[0]
        arrival = random.choices(mode_of_arrival, weights=[0.2, 0.3, 0.5])[0]

        # Clip to realistic ranges for that triage
        respiratory_rate = np.clip(respiratory_rate, 10, 25)
        heart_rate = np.clip(heart_rate, 50, 110)
        if (systolic_bp - diastolic_bp) > 60 or (systolic_bp - diastolic_bp) < 25:
            systolic_bp = np.clip(systolic_bp, 135, 155)
            diastolic_bp = np.clip(diastolic_bp, 80, 120)
        temperature = np.clip(temperature, 30.0, 43.0)
        oxygen_saturation = np.clip(oxygen_saturation, 50, 100)

    elif triage_class == "urgent":
        respiratory_rate = int(np.random.normal(18, 3))
        heart_rate = int(np.random.normal(80, 10))
        systolic_bp = int(np.random.normal(120, 8))
        diastolic_bp = int(np.random.normal(80, 6))
        temperature = round(np.random.normal(37.5, 0.4), 1)
        oxygen_saturation = int(np.random.normal(95, 2))
        avpu = random.choices(avpu_scale, weights=[0.6, 0.2, 0.2, 0.0])[0]
        arrival = random.choices(mode_of_arrival, weights=[0.3, 0.5, 0.2])[0]

        # Clip to realistic ranges for that triage
        respiratory_rate = np.clip(respiratory_rate, 12, 21)
        heart_rate = np.clip(heart_rate, 70, 100)
        if (systolic_bp - diastolic_bp) > 50 or (systolic_bp - diastolic_bp) < 27:
            systolic_bp = np.clip(systolic_bp, 120, 130)
            diastolic_bp = np.clip(diastolic_bp, 80, 100)
        temperature = np.clip(temperature, 30.0, 43.0)
        oxygen_saturation = np.clip(oxygen_saturation, 93, 100)

    else:  # non-urg
        respiratory_rate = int(np.random.normal(16, 2))
        heart_rate = int(np.random.normal(75, 8))
        systolic_bp = int(np.random.normal(120, 5))
        diastolic_bp = int(np.random.normal(75, 6))
        temperature = round(np.random.normal(37.0, 0.2), 1)
        oxygen_saturation = int(np.random.normal(98, 1))
        avpu = random.choices(avpu_scale, weights=[0.8, 0.2, 0.0, 0.0])[0]
        arrival = random.choices(mode_of_arrival, weights=[0.6, 0.4, 0.0])[0]

        # Clip to realistic ranges for that triage
        respiratory_rate = np.clip(respiratory_rate, 12, 20)
        heart_rate = np.clip(heart_rate, 60, 100)
        if (systolic_bp - diastolic_bp) > 45 or (systolic_bp - diastolic_bp) < 30:
            systolic_bp = np.clip(systolic_bp, 110, 115)
            diastolic_bp = np.clip(diastolic_bp, 70, 80)
        temperature = np.clip(temperature, 30.0, 43.0)
        oxygen_saturation = np.clip(oxygen_saturation, 95, 100)

    if avpu == 'unresponsive':
       triage_class == "emergency"

    if diastolic_bp > systolic_bp:
            systolic_bp = diastolic_bp + 30

    if pregnancy == "Yes":
        heart_rate += 3
        systolic_bp -= 5

    if chief_complaint == "Chest pain":
        heart_rate += 5

    elif chief_complaint == "Difficulty breathing":
        respiratory_rate += 2

    elif chief_complaint == "Injury":
        if active_bleeding == "Yes":
            heart_rate += 3

    elif chief_complaint == "Fever":
        temperature += 0.5

    return age, sex, arrival, chief_complaint, active_bleeding, respiratory_rate, heart_rate, systolic_bp, \
     diastolic_bp, temperature, oxygen_saturation, avpu, pregnancy

In [5]:
data = []

for cls in triage_category:
    for _ in range(n_per_class):
        age, sex, arrival, chief_complaint, active_bleeding, respiratory_rate, heart_rate, systolic_bp, \
        diastolic_bp, temperature, oxygen_saturation, avpu, pregnancy = generate_patient(cls)

        data.append([age, sex, arrival, chief_complaint, active_bleeding, respiratory_rate, heart_rate, \
                     systolic_bp, diastolic_bp, temperature, oxygen_saturation, avpu, pregnancy, cls])

df = pd.DataFrame(data, columns=["age", "sex", "mode_of_arrival", "chief_complaint", "active_bleeding",
    "resp_rate", "heart_rate", "systolic_bp", "diastolic_bp", "temperature",
    "oxygen_sat", "AVPU_scale", "pregnancy", "Triage_Category"])

In [6]:
file_path = "Synthetic_Nigerian_ED_Triage_Data.xlsx"
df.to_excel(file_path, index=False)

file_path

'Synthetic_Nigerian_ED_Triage_DataB.xlsx'