In [2]:
import pandas as pd
import numpy as np
import random

np.random.seed(42)

N = 10000  # number of patients

data = []

injuries = [
    "Head Injury", "Chest Pain", "Fracture", "Burn Injury",
    "Abdominal Pain", "Respiratory Distress",
    "Stroke Suspected", "Cut Injury", "Fever", "Sprain"
]

for i in range(N):
    age = random.randint(1, 90)
    heart_rate = random.randint(60, 140)
    systolic_bp = random.randint(80, 150)
    diastolic_bp = random.randint(50, 100)
    spo2 = random.randint(85, 100)
    temperature = round(random.uniform(36.0, 40.0), 1)
    gcs = random.randint(3, 15)
    injury = random.choice(injuries)

    # --- Triage Logic ---
    if spo2 < 90 or gcs <= 8 or systolic_bp < 90:
        triage = "RED"
        priority = random.randint(90, 100)
    elif spo2 < 92 or heart_rate > 120 or temperature > 38.5:
        triage = "ORANGE"
        priority = random.randint(70, 89)
    elif heart_rate > 100 or temperature > 37.5:
        triage = "YELLOW"
        priority = random.randint(40, 69)
    else:
        triage = "GREEN"
        priority = random.randint(10, 39)

    notes = f"{injury} with SpO2 {spo2}%, HR {heart_rate}"

    data.append([
        f"P{i+1:04}", age, heart_rate,
        systolic_bp, diastolic_bp, spo2,
        temperature, gcs, injury,
        notes, triage, priority
    ])

columns = [
    "patient_id", "age", "heart_rate",
    "systolic_bp", "diastolic_bp",
    "spo2", "temperature", "gcs_score",
    "injury_type", "doctor_notes",
    "triage_level", "priority_score"
]

df = pd.DataFrame(data, columns=columns)

df.to_csv("er_triage_dataset.csv", index=False)

print("✅ Dataset generated: er_triage_dataset.csv")
print(df.head())

✅ Dataset generated: er_triage_dataset.csv
  patient_id  age  heart_rate  systolic_bp  diastolic_bp  spo2  temperature  \
0      P0001   68         115          147            91    98         38.9   
1      P0002   85          87          115            95    87         39.6   
2      P0003   72         130           80            87    93         36.7   
3      P0004   18          84           99            57    89         40.0   
4      P0005   59         117          143            95    90         37.8   

   gcs_score       injury_type                           doctor_notes  \
0          3          Fracture         Fracture with SpO2 98%, HR 115   
1          9       Head Injury       Head Injury with SpO2 87%, HR 87   
2          7       Head Injury      Head Injury with SpO2 93%, HR 130   
3          3  Stroke Suspected  Stroke Suspected with SpO2 89%, HR 84   
4          4        Cut Injury       Cut Injury with SpO2 90%, HR 117   

  triage_level  priority_score  
0         

In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv("er_triage_dataset.csv")

In [4]:
import random

np.random.seed(42)

mask = np.random.rand(len(df)) < 0.1  # 10% label noise
df.loc[mask, "triage_level"] = np.random.choice(
    ["RED", "ORANGE", "YELLOW", "GREEN"],
    size=mask.sum()
)


In [11]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df["triage_level_encoded"] = le.fit_transform(df["triage_level"])

In [12]:
X = df.drop([
    "patient_id",
    "triage_level",
    "triage_level_encoded",
    "doctor_notes",
    "injury_type",
    "priority_score"
], axis=1)

y = df["triage_level_encoded"]

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [14]:
from xgboost import XGBClassifier

model = XGBClassifier(
    n_estimators=120,
    max_depth=3,
    learning_rate=0.05,
    subsample=0.7,
    colsample_bytree=0.7,
    reg_alpha=1.0,
    reg_lambda=2.0,
    objective="multi:softmax",
    num_class=4,
    random_state=42
)

model.fit(X_train, y_train)

In [16]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(
    y_test, y_pred,
    target_names=le.classes_
))

Accuracy: 0.9195

Classification Report:

              precision    recall  f1-score   support

       GREEN       0.90      0.56      0.69       142
      ORANGE       0.94      0.90      0.92       388
         RED       0.92      0.99      0.95      1282
      YELLOW       0.90      0.78      0.84       188

    accuracy                           0.92      2000
   macro avg       0.91      0.81      0.85      2000
weighted avg       0.92      0.92      0.92      2000



In [19]:
# Example new patient after first aid
new_patient = pd.DataFrame([{
    "age": 62,
    "heart_rate": 128,
    "systolic_bp": 88,
    "diastolic_bp": 60,
    "spo2": 89,
    "temperature": 38.9,
    "gcs_score": 7
}])

prediction = model.predict(new_patient)
triage_result = le.inverse_transform(prediction)

print("Predicted Triage Level:", triage_result[0])



Predicted Triage Level: RED
